1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/x86/assembler_x86.h"
39 #include "utils/x86/constants_x86.h"
40
41 namespace art {
42
43 namespace x86 {
44
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)45 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
46 : allocator_(codegen->GetGraph()->GetAllocator()),
47 codegen_(codegen) {
48 }
49
50
GetAssembler()51 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
52 return down_cast<X86Assembler*>(codegen_->GetAssembler());
53 }
54
GetAllocator()55 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
56 return codegen_->GetGraph()->GetAllocator();
57 }
58
TryDispatch(HInvoke * invoke)59 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
60 Dispatch(invoke);
61 LocationSummary* res = invoke->GetLocations();
62 if (res == nullptr) {
63 return false;
64 }
65 return res->Intrinsified();
66 }
67
68 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
69
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
72
73 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
74 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
75 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)76 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
77 : SlowPathCode(instruction) {
78 DCHECK(kEmitCompilerReadBarrier);
79 DCHECK(kUseBakerReadBarrier);
80 }
81
EmitNativeCode(CodeGenerator * codegen)82 void EmitNativeCode(CodeGenerator* codegen) override {
83 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
84 LocationSummary* locations = instruction_->GetLocations();
85 DCHECK(locations->CanCall());
86 DCHECK(instruction_->IsInvokeStaticOrDirect())
87 << "Unexpected instruction in read barrier arraycopy slow path: "
88 << instruction_->DebugName();
89 DCHECK(instruction_->GetLocations()->Intrinsified());
90 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
91
92 int32_t element_size = DataType::Size(DataType::Type::kReference);
93 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
94
95 Register src = locations->InAt(0).AsRegister<Register>();
96 Location src_pos = locations->InAt(1);
97 Register dest = locations->InAt(2).AsRegister<Register>();
98 Location dest_pos = locations->InAt(3);
99 Location length = locations->InAt(4);
100 Location temp1_loc = locations->GetTemp(0);
101 Register temp1 = temp1_loc.AsRegister<Register>();
102 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
103 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
104
105 __ Bind(GetEntryLabel());
106 // In this code path, registers `temp1`, `temp2`, and `temp3`
107 // (resp.) are not used for the base source address, the base
108 // destination address, and the end source address (resp.), as in
109 // other SystemArrayCopy intrinsic code paths. Instead they are
110 // (resp.) used for:
111 // - the loop index (`i`);
112 // - the source index (`src_index`) and the loaded (source)
113 // reference (`value`); and
114 // - the destination index (`dest_index`).
115
116 // i = 0
117 __ xorl(temp1, temp1);
118 NearLabel loop;
119 __ Bind(&loop);
120 // value = src_array[i + src_pos]
121 if (src_pos.IsConstant()) {
122 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
123 int32_t adjusted_offset = offset + constant * element_size;
124 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
125 } else {
126 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
127 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
128 }
129 __ MaybeUnpoisonHeapReference(temp2);
130 // TODO: Inline the mark bit check before calling the runtime?
131 // value = ReadBarrier::Mark(value)
132 // No need to save live registers; it's taken care of by the
133 // entrypoint. Also, there is no need to update the stack mask,
134 // as this runtime call will not trigger a garbage collection.
135 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
136 // explanations.)
137 DCHECK_NE(temp2, ESP);
138 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
139 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
140 // This runtime call does not require a stack map.
141 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
142 __ MaybePoisonHeapReference(temp2);
143 // dest_array[i + dest_pos] = value
144 if (dest_pos.IsConstant()) {
145 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
146 int32_t adjusted_offset = offset + constant * element_size;
147 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
148 } else {
149 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
150 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
151 }
152 // ++i
153 __ addl(temp1, Immediate(1));
154 // if (i != length) goto loop
155 x86_codegen->GenerateIntCompare(temp1_loc, length);
156 __ j(kNotEqual, &loop);
157 __ jmp(GetExitLabel());
158 }
159
GetDescription() const160 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
161
162 private:
163 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
164 };
165
166 #undef __
167
168 #define __ assembler->
169
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)170 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
171 LocationSummary* locations =
172 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
173 locations->SetInAt(0, Location::RequiresFpuRegister());
174 locations->SetOut(Location::RequiresRegister());
175 if (is64bit) {
176 locations->AddTemp(Location::RequiresFpuRegister());
177 }
178 }
179
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)180 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
181 LocationSummary* locations =
182 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183 locations->SetInAt(0, Location::RequiresRegister());
184 locations->SetOut(Location::RequiresFpuRegister());
185 if (is64bit) {
186 locations->AddTemp(Location::RequiresFpuRegister());
187 locations->AddTemp(Location::RequiresFpuRegister());
188 }
189 }
190
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)191 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
192 Location input = locations->InAt(0);
193 Location output = locations->Out();
194 if (is64bit) {
195 // Need to use the temporary.
196 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
197 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
198 __ movd(output.AsRegisterPairLow<Register>(), temp);
199 __ psrlq(temp, Immediate(32));
200 __ movd(output.AsRegisterPairHigh<Register>(), temp);
201 } else {
202 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
203 }
204 }
205
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)206 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
207 Location input = locations->InAt(0);
208 Location output = locations->Out();
209 if (is64bit) {
210 // Need to use the temporary.
211 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
213 __ movd(temp1, input.AsRegisterPairLow<Register>());
214 __ movd(temp2, input.AsRegisterPairHigh<Register>());
215 __ punpckldq(temp1, temp2);
216 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
217 } else {
218 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
219 }
220 }
221
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)222 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
223 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
224 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
226 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
227 }
228
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)229 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
230 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
231 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
233 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
235
VisitFloatFloatToRawIntBits(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
237 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
238 }
VisitFloatIntBitsToFloat(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
240 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
241 }
242
VisitFloatFloatToRawIntBits(HInvoke * invoke)243 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
244 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
245 }
VisitFloatIntBitsToFloat(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
247 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
249
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251 LocationSummary* locations =
252 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253 locations->SetInAt(0, Location::RequiresRegister());
254 locations->SetOut(Location::SameAsFirstInput());
255 }
256
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)257 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
258 LocationSummary* locations =
259 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
260 locations->SetInAt(0, Location::RequiresRegister());
261 locations->SetOut(Location::RequiresRegister());
262 }
263
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)264 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
265 LocationSummary* locations =
266 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
267 locations->SetInAt(0, Location::RequiresRegister());
268 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
269 }
270
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)271 static void GenReverseBytes(LocationSummary* locations,
272 DataType::Type size,
273 X86Assembler* assembler) {
274 Register out = locations->Out().AsRegister<Register>();
275
276 switch (size) {
277 case DataType::Type::kInt16:
278 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
279 __ bswapl(out);
280 __ sarl(out, Immediate(16));
281 break;
282 case DataType::Type::kInt32:
283 __ bswapl(out);
284 break;
285 default:
286 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
287 UNREACHABLE();
288 }
289 }
290
VisitIntegerReverseBytes(HInvoke * invoke)291 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
292 CreateIntToIntLocations(allocator_, invoke);
293 }
294
VisitIntegerReverseBytes(HInvoke * invoke)295 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
296 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
297 }
298
VisitLongReverseBytes(HInvoke * invoke)299 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
300 CreateLongToLongLocations(allocator_, invoke);
301 }
302
VisitLongReverseBytes(HInvoke * invoke)303 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
304 LocationSummary* locations = invoke->GetLocations();
305 Location input = locations->InAt(0);
306 Register input_lo = input.AsRegisterPairLow<Register>();
307 Register input_hi = input.AsRegisterPairHigh<Register>();
308 Location output = locations->Out();
309 Register output_lo = output.AsRegisterPairLow<Register>();
310 Register output_hi = output.AsRegisterPairHigh<Register>();
311
312 X86Assembler* assembler = GetAssembler();
313 // Assign the inputs to the outputs, mixing low/high.
314 __ movl(output_lo, input_hi);
315 __ movl(output_hi, input_lo);
316 __ bswapl(output_lo);
317 __ bswapl(output_hi);
318 }
319
VisitShortReverseBytes(HInvoke * invoke)320 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
321 CreateIntToIntLocations(allocator_, invoke);
322 }
323
VisitShortReverseBytes(HInvoke * invoke)324 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
325 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
326 }
327
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
329 LocationSummary* locations =
330 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331 locations->SetInAt(0, Location::RequiresFpuRegister());
332 locations->SetOut(Location::RequiresFpuRegister());
333 }
334
VisitMathSqrt(HInvoke * invoke)335 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
336 CreateFPToFPLocations(allocator_, invoke);
337 }
338
VisitMathSqrt(HInvoke * invoke)339 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
340 LocationSummary* locations = invoke->GetLocations();
341 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343
344 GetAssembler()->sqrtsd(out, in);
345 }
346
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)347 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
348 HInvoke* invoke,
349 CodeGeneratorX86* codegen) {
350 // Do we have instruction support?
351 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
352 return;
353 }
354
355 CreateFPToFPLocations(allocator, invoke);
356 }
357
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)358 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
359 LocationSummary* locations = invoke->GetLocations();
360 DCHECK(!locations->WillCall());
361 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
363 __ roundsd(out, in, Immediate(round_mode));
364 }
365
VisitMathCeil(HInvoke * invoke)366 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
367 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
368 }
369
VisitMathCeil(HInvoke * invoke)370 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
371 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
372 }
373
VisitMathFloor(HInvoke * invoke)374 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
375 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
376 }
377
VisitMathFloor(HInvoke * invoke)378 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
379 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
380 }
381
VisitMathRint(HInvoke * invoke)382 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
383 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
384 }
385
VisitMathRint(HInvoke * invoke)386 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
387 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
388 }
389
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
391 // Do we have instruction support?
392 if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
393 return;
394 }
395
396 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
397 DCHECK(static_or_direct != nullptr);
398 LocationSummary* locations =
399 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
400 locations->SetInAt(0, Location::RequiresFpuRegister());
401 if (static_or_direct->HasSpecialInput() &&
402 invoke->InputAt(
403 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
404 locations->SetInAt(1, Location::RequiresRegister());
405 }
406 locations->SetOut(Location::RequiresRegister());
407 locations->AddTemp(Location::RequiresFpuRegister());
408 locations->AddTemp(Location::RequiresFpuRegister());
409 }
410
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
412 LocationSummary* locations = invoke->GetLocations();
413 DCHECK(!locations->WillCall());
414
415 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
417 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
418 Register out = locations->Out().AsRegister<Register>();
419 NearLabel skip_incr, done;
420 X86Assembler* assembler = GetAssembler();
421
422 // Since no direct x86 rounding instruction matches the required semantics,
423 // this intrinsic is implemented as follows:
424 // result = floor(in);
425 // if (in - result >= 0.5f)
426 // result = result + 1.0f;
427 __ movss(t2, in);
428 __ roundss(t1, in, Immediate(1));
429 __ subss(t2, t1);
430 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
431 // Direct constant area available.
432 HX86ComputeBaseMethodAddress* method_address =
433 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
434 Register constant_area = locations->InAt(1).AsRegister<Register>();
435 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
436 method_address,
437 constant_area));
438 __ j(kBelow, &skip_incr);
439 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
440 method_address,
441 constant_area));
442 __ Bind(&skip_incr);
443 } else {
444 // No constant area: go through stack.
445 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
446 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
447 __ comiss(t2, Address(ESP, 4));
448 __ j(kBelow, &skip_incr);
449 __ addss(t1, Address(ESP, 0));
450 __ Bind(&skip_incr);
451 __ addl(ESP, Immediate(8));
452 }
453
454 // Final conversion to an integer. Unfortunately this also does not have a
455 // direct x86 instruction, since NaN should map to 0 and large positive
456 // values need to be clipped to the extreme value.
457 __ movl(out, Immediate(kPrimIntMax));
458 __ cvtsi2ss(t2, out);
459 __ comiss(t1, t2);
460 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
461 __ movl(out, Immediate(0)); // does not change flags
462 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
463 __ cvttss2si(out, t1);
464 __ Bind(&done);
465 }
466
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)467 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
468 LocationSummary* locations =
469 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
470 InvokeRuntimeCallingConvention calling_convention;
471 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
472 locations->SetOut(Location::FpuRegisterLocation(XMM0));
473 }
474
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)475 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
476 LocationSummary* locations = invoke->GetLocations();
477 DCHECK(locations->WillCall());
478 DCHECK(invoke->IsInvokeStaticOrDirect());
479 X86Assembler* assembler = codegen->GetAssembler();
480
481 // We need some place to pass the parameters.
482 __ subl(ESP, Immediate(16));
483 __ cfi().AdjustCFAOffset(16);
484
485 // Pass the parameters at the bottom of the stack.
486 __ movsd(Address(ESP, 0), XMM0);
487
488 // If we have a second parameter, pass it next.
489 if (invoke->GetNumberOfArguments() == 2) {
490 __ movsd(Address(ESP, 8), XMM1);
491 }
492
493 // Now do the actual call.
494 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
495
496 // Extract the return value from the FP stack.
497 __ fstpl(Address(ESP, 0));
498 __ movsd(XMM0, Address(ESP, 0));
499
500 // And clean up the stack.
501 __ addl(ESP, Immediate(16));
502 __ cfi().AdjustCFAOffset(-16);
503 }
504
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)505 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
506 LocationSummary* locations =
507 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
508 if (is_long) {
509 locations->SetInAt(0, Location::RequiresRegister());
510 } else {
511 locations->SetInAt(0, Location::Any());
512 }
513 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
514 }
515
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)516 static void GenLowestOneBit(X86Assembler* assembler,
517 CodeGeneratorX86* codegen,
518 bool is_long,
519 HInvoke* invoke) {
520 LocationSummary* locations = invoke->GetLocations();
521 Location src = locations->InAt(0);
522 Location out_loc = locations->Out();
523
524 if (invoke->InputAt(0)->IsConstant()) {
525 // Evaluate this at compile time.
526 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
527 if (value == 0) {
528 if (is_long) {
529 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
530 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
531 } else {
532 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
533 }
534 return;
535 }
536 // Nonzero value.
537 value = is_long ? CTZ(static_cast<uint64_t>(value))
538 : CTZ(static_cast<uint32_t>(value));
539 if (is_long) {
540 if (value >= 32) {
541 int shift = value-32;
542 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
543 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
544 } else {
545 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
546 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
547 }
548 } else {
549 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
550 }
551 return;
552 }
553 // Handle non constant case
554 if (is_long) {
555 DCHECK(src.IsRegisterPair());
556 Register src_lo = src.AsRegisterPairLow<Register>();
557 Register src_hi = src.AsRegisterPairHigh<Register>();
558
559 Register out_lo = out_loc.AsRegisterPairLow<Register>();
560 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
561
562 __ movl(out_lo, src_lo);
563 __ movl(out_hi, src_hi);
564
565 __ negl(out_lo);
566 __ adcl(out_hi, Immediate(0));
567 __ negl(out_hi);
568
569 __ andl(out_lo, src_lo);
570 __ andl(out_hi, src_hi);
571 } else {
572 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
573 Register out = out_loc.AsRegister<Register>();
574 __ blsi(out, src.AsRegister<Register>());
575 } else {
576 Register out = out_loc.AsRegister<Register>();
577 // Do tmp & -tmp
578 if (src.IsRegister()) {
579 __ movl(out, src.AsRegister<Register>());
580 } else {
581 DCHECK(src.IsStackSlot());
582 __ movl(out, Address(ESP, src.GetStackIndex()));
583 }
584 __ negl(out);
585
586 if (src.IsRegister()) {
587 __ andl(out, src.AsRegister<Register>());
588 } else {
589 __ andl(out, Address(ESP, src.GetStackIndex()));
590 }
591 }
592 }
593 }
594
VisitMathCos(HInvoke * invoke)595 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
596 CreateFPToFPCallLocations(allocator_, invoke);
597 }
598
VisitMathCos(HInvoke * invoke)599 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
600 GenFPToFPCall(invoke, codegen_, kQuickCos);
601 }
602
VisitMathSin(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
604 CreateFPToFPCallLocations(allocator_, invoke);
605 }
606
VisitMathSin(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
608 GenFPToFPCall(invoke, codegen_, kQuickSin);
609 }
610
VisitMathAcos(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
612 CreateFPToFPCallLocations(allocator_, invoke);
613 }
614
VisitMathAcos(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
616 GenFPToFPCall(invoke, codegen_, kQuickAcos);
617 }
618
VisitMathAsin(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
620 CreateFPToFPCallLocations(allocator_, invoke);
621 }
622
VisitMathAsin(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
624 GenFPToFPCall(invoke, codegen_, kQuickAsin);
625 }
626
VisitMathAtan(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
628 CreateFPToFPCallLocations(allocator_, invoke);
629 }
630
VisitMathAtan(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
632 GenFPToFPCall(invoke, codegen_, kQuickAtan);
633 }
634
VisitMathCbrt(HInvoke * invoke)635 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
636 CreateFPToFPCallLocations(allocator_, invoke);
637 }
638
VisitMathCbrt(HInvoke * invoke)639 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
640 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
641 }
642
VisitMathCosh(HInvoke * invoke)643 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
644 CreateFPToFPCallLocations(allocator_, invoke);
645 }
646
VisitMathCosh(HInvoke * invoke)647 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
648 GenFPToFPCall(invoke, codegen_, kQuickCosh);
649 }
650
VisitMathExp(HInvoke * invoke)651 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
652 CreateFPToFPCallLocations(allocator_, invoke);
653 }
654
VisitMathExp(HInvoke * invoke)655 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
656 GenFPToFPCall(invoke, codegen_, kQuickExp);
657 }
658
VisitMathExpm1(HInvoke * invoke)659 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
660 CreateFPToFPCallLocations(allocator_, invoke);
661 }
662
VisitMathExpm1(HInvoke * invoke)663 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
664 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
665 }
666
VisitMathLog(HInvoke * invoke)667 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
668 CreateFPToFPCallLocations(allocator_, invoke);
669 }
670
VisitMathLog(HInvoke * invoke)671 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
672 GenFPToFPCall(invoke, codegen_, kQuickLog);
673 }
674
VisitMathLog10(HInvoke * invoke)675 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
676 CreateFPToFPCallLocations(allocator_, invoke);
677 }
678
VisitMathLog10(HInvoke * invoke)679 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
680 GenFPToFPCall(invoke, codegen_, kQuickLog10);
681 }
682
VisitMathSinh(HInvoke * invoke)683 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
684 CreateFPToFPCallLocations(allocator_, invoke);
685 }
686
VisitMathSinh(HInvoke * invoke)687 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
688 GenFPToFPCall(invoke, codegen_, kQuickSinh);
689 }
690
VisitMathTan(HInvoke * invoke)691 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
692 CreateFPToFPCallLocations(allocator_, invoke);
693 }
694
VisitMathTan(HInvoke * invoke)695 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
696 GenFPToFPCall(invoke, codegen_, kQuickTan);
697 }
698
VisitMathTanh(HInvoke * invoke)699 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
700 CreateFPToFPCallLocations(allocator_, invoke);
701 }
702
VisitMathTanh(HInvoke * invoke)703 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
704 GenFPToFPCall(invoke, codegen_, kQuickTanh);
705 }
706
VisitIntegerLowestOneBit(HInvoke * invoke)707 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
708 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
709 }
VisitIntegerLowestOneBit(HInvoke * invoke)710 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
711 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
712 }
713
VisitLongLowestOneBit(HInvoke * invoke)714 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
715 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
716 }
717
VisitLongLowestOneBit(HInvoke * invoke)718 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
719 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
720 }
721
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)722 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
723 LocationSummary* locations =
724 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
725 InvokeRuntimeCallingConvention calling_convention;
726 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
727 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
728 locations->SetOut(Location::FpuRegisterLocation(XMM0));
729 }
730
VisitMathAtan2(HInvoke * invoke)731 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
732 CreateFPFPToFPCallLocations(allocator_, invoke);
733 }
734
VisitMathAtan2(HInvoke * invoke)735 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
736 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
737 }
738
VisitMathPow(HInvoke * invoke)739 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
740 CreateFPFPToFPCallLocations(allocator_, invoke);
741 }
742
VisitMathPow(HInvoke * invoke)743 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
744 GenFPToFPCall(invoke, codegen_, kQuickPow);
745 }
746
VisitMathHypot(HInvoke * invoke)747 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
748 CreateFPFPToFPCallLocations(allocator_, invoke);
749 }
750
VisitMathHypot(HInvoke * invoke)751 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
752 GenFPToFPCall(invoke, codegen_, kQuickHypot);
753 }
754
VisitMathNextAfter(HInvoke * invoke)755 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
756 CreateFPFPToFPCallLocations(allocator_, invoke);
757 }
758
VisitMathNextAfter(HInvoke * invoke)759 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
760 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
761 }
762
VisitSystemArrayCopyChar(HInvoke * invoke)763 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
764 // We need at least two of the positions or length to be an integer constant,
765 // or else we won't have enough free registers.
766 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
767 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
768 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
769
770 int num_constants =
771 ((src_pos != nullptr) ? 1 : 0)
772 + ((dest_pos != nullptr) ? 1 : 0)
773 + ((length != nullptr) ? 1 : 0);
774
775 if (num_constants < 2) {
776 // Not enough free registers.
777 return;
778 }
779
780 // As long as we are checking, we might as well check to see if the src and dest
781 // positions are >= 0.
782 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
783 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
784 // We will have to fail anyways.
785 return;
786 }
787
788 // And since we are already checking, check the length too.
789 if (length != nullptr) {
790 int32_t len = length->GetValue();
791 if (len < 0) {
792 // Just call as normal.
793 return;
794 }
795 }
796
797 // Okay, it is safe to generate inline code.
798 LocationSummary* locations =
799 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
800 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
801 locations->SetInAt(0, Location::RequiresRegister());
802 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
803 locations->SetInAt(2, Location::RequiresRegister());
804 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
805 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
806
807 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
808 locations->AddTemp(Location::RegisterLocation(ESI));
809 locations->AddTemp(Location::RegisterLocation(EDI));
810 locations->AddTemp(Location::RegisterLocation(ECX));
811 }
812
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)813 static void CheckPosition(X86Assembler* assembler,
814 Location pos,
815 Register input,
816 Location length,
817 SlowPathCode* slow_path,
818 Register temp,
819 bool length_is_input_length = false) {
820 // Where is the length in the Array?
821 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
822
823 if (pos.IsConstant()) {
824 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
825 if (pos_const == 0) {
826 if (!length_is_input_length) {
827 // Check that length(input) >= length.
828 if (length.IsConstant()) {
829 __ cmpl(Address(input, length_offset),
830 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
831 } else {
832 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
833 }
834 __ j(kLess, slow_path->GetEntryLabel());
835 }
836 } else {
837 // Check that length(input) >= pos.
838 __ movl(temp, Address(input, length_offset));
839 __ subl(temp, Immediate(pos_const));
840 __ j(kLess, slow_path->GetEntryLabel());
841
842 // Check that (length(input) - pos) >= length.
843 if (length.IsConstant()) {
844 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
845 } else {
846 __ cmpl(temp, length.AsRegister<Register>());
847 }
848 __ j(kLess, slow_path->GetEntryLabel());
849 }
850 } else if (length_is_input_length) {
851 // The only way the copy can succeed is if pos is zero.
852 Register pos_reg = pos.AsRegister<Register>();
853 __ testl(pos_reg, pos_reg);
854 __ j(kNotEqual, slow_path->GetEntryLabel());
855 } else {
856 // Check that pos >= 0.
857 Register pos_reg = pos.AsRegister<Register>();
858 __ testl(pos_reg, pos_reg);
859 __ j(kLess, slow_path->GetEntryLabel());
860
861 // Check that pos <= length(input).
862 __ cmpl(Address(input, length_offset), pos_reg);
863 __ j(kLess, slow_path->GetEntryLabel());
864
865 // Check that (length(input) - pos) >= length.
866 __ movl(temp, Address(input, length_offset));
867 __ subl(temp, pos_reg);
868 if (length.IsConstant()) {
869 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
870 } else {
871 __ cmpl(temp, length.AsRegister<Register>());
872 }
873 __ j(kLess, slow_path->GetEntryLabel());
874 }
875 }
876
VisitSystemArrayCopyChar(HInvoke * invoke)877 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
878 X86Assembler* assembler = GetAssembler();
879 LocationSummary* locations = invoke->GetLocations();
880
881 Register src = locations->InAt(0).AsRegister<Register>();
882 Location srcPos = locations->InAt(1);
883 Register dest = locations->InAt(2).AsRegister<Register>();
884 Location destPos = locations->InAt(3);
885 Location length = locations->InAt(4);
886
887 // Temporaries that we need for MOVSW.
888 Register src_base = locations->GetTemp(0).AsRegister<Register>();
889 DCHECK_EQ(src_base, ESI);
890 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
891 DCHECK_EQ(dest_base, EDI);
892 Register count = locations->GetTemp(2).AsRegister<Register>();
893 DCHECK_EQ(count, ECX);
894
895 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
896 codegen_->AddSlowPath(slow_path);
897
898 // Bail out if the source and destination are the same (to handle overlap).
899 __ cmpl(src, dest);
900 __ j(kEqual, slow_path->GetEntryLabel());
901
902 // Bail out if the source is null.
903 __ testl(src, src);
904 __ j(kEqual, slow_path->GetEntryLabel());
905
906 // Bail out if the destination is null.
907 __ testl(dest, dest);
908 __ j(kEqual, slow_path->GetEntryLabel());
909
910 // If the length is negative, bail out.
911 // We have already checked in the LocationsBuilder for the constant case.
912 if (!length.IsConstant()) {
913 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
914 __ j(kLess, slow_path->GetEntryLabel());
915 }
916
917 // We need the count in ECX.
918 if (length.IsConstant()) {
919 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
920 } else {
921 __ movl(count, length.AsRegister<Register>());
922 }
923
924 // Validity checks: source. Use src_base as a temporary register.
925 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
926
927 // Validity checks: dest. Use src_base as a temporary register.
928 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
929
930 // Okay, everything checks out. Finally time to do the copy.
931 // Check assumption that sizeof(Char) is 2 (used in scaling below).
932 const size_t char_size = DataType::Size(DataType::Type::kUint16);
933 DCHECK_EQ(char_size, 2u);
934
935 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
936
937 if (srcPos.IsConstant()) {
938 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
939 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
940 } else {
941 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
942 ScaleFactor::TIMES_2, data_offset));
943 }
944 if (destPos.IsConstant()) {
945 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
946
947 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
948 } else {
949 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
950 ScaleFactor::TIMES_2, data_offset));
951 }
952
953 // Do the move.
954 __ rep_movsw();
955
956 __ Bind(slow_path->GetExitLabel());
957 }
958
VisitStringCompareTo(HInvoke * invoke)959 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
960 // The inputs plus one temp.
961 LocationSummary* locations = new (allocator_) LocationSummary(
962 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
963 InvokeRuntimeCallingConvention calling_convention;
964 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
965 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
966 locations->SetOut(Location::RegisterLocation(EAX));
967 }
968
VisitStringCompareTo(HInvoke * invoke)969 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
970 X86Assembler* assembler = GetAssembler();
971 LocationSummary* locations = invoke->GetLocations();
972
973 // Note that the null check must have been done earlier.
974 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
975
976 Register argument = locations->InAt(1).AsRegister<Register>();
977 __ testl(argument, argument);
978 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
979 codegen_->AddSlowPath(slow_path);
980 __ j(kEqual, slow_path->GetEntryLabel());
981
982 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
983 __ Bind(slow_path->GetExitLabel());
984 }
985
VisitStringEquals(HInvoke * invoke)986 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
987 LocationSummary* locations =
988 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
989 locations->SetInAt(0, Location::RequiresRegister());
990 locations->SetInAt(1, Location::RequiresRegister());
991
992 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
993 locations->AddTemp(Location::RegisterLocation(ECX));
994 locations->AddTemp(Location::RegisterLocation(EDI));
995
996 // Set output, ESI needed for repe_cmpsl instruction anyways.
997 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
998 }
999
VisitStringEquals(HInvoke * invoke)1000 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1001 X86Assembler* assembler = GetAssembler();
1002 LocationSummary* locations = invoke->GetLocations();
1003
1004 Register str = locations->InAt(0).AsRegister<Register>();
1005 Register arg = locations->InAt(1).AsRegister<Register>();
1006 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1007 Register edi = locations->GetTemp(1).AsRegister<Register>();
1008 Register esi = locations->Out().AsRegister<Register>();
1009
1010 NearLabel end, return_true, return_false;
1011
1012 // Get offsets of count, value, and class fields within a string object.
1013 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1014 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1015 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1016
1017 // Note that the null check must have been done earlier.
1018 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1019
1020 StringEqualsOptimizations optimizations(invoke);
1021 if (!optimizations.GetArgumentNotNull()) {
1022 // Check if input is null, return false if it is.
1023 __ testl(arg, arg);
1024 __ j(kEqual, &return_false);
1025 }
1026
1027 if (!optimizations.GetArgumentIsString()) {
1028 // Instanceof check for the argument by comparing class fields.
1029 // All string objects must have the same type since String cannot be subclassed.
1030 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1031 // If the argument is a string object, its class field must be equal to receiver's class field.
1032 //
1033 // As the String class is expected to be non-movable, we can read the class
1034 // field from String.equals' arguments without read barriers.
1035 AssertNonMovableStringClass();
1036 // Also, because we use the loaded class references only to compare them, we
1037 // don't need to unpoison them.
1038 // /* HeapReference<Class> */ ecx = str->klass_
1039 __ movl(ecx, Address(str, class_offset));
1040 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1041 __ cmpl(ecx, Address(arg, class_offset));
1042 __ j(kNotEqual, &return_false);
1043 }
1044
1045 // Reference equality check, return true if same reference.
1046 __ cmpl(str, arg);
1047 __ j(kEqual, &return_true);
1048
1049 // Load length and compression flag of receiver string.
1050 __ movl(ecx, Address(str, count_offset));
1051 // Check if lengths and compression flags are equal, return false if they're not.
1052 // Two identical strings will always have same compression style since
1053 // compression style is decided on alloc.
1054 __ cmpl(ecx, Address(arg, count_offset));
1055 __ j(kNotEqual, &return_false);
1056 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1057 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1058 "Expecting 0=compressed, 1=uncompressed");
1059 __ jecxz(&return_true);
1060
1061 if (mirror::kUseStringCompression) {
1062 NearLabel string_uncompressed;
1063 // Extract length and differentiate between both compressed or both uncompressed.
1064 // Different compression style is cut above.
1065 __ shrl(ecx, Immediate(1));
1066 __ j(kCarrySet, &string_uncompressed);
1067 // Divide string length by 2, rounding up, and continue as if uncompressed.
1068 __ addl(ecx, Immediate(1));
1069 __ shrl(ecx, Immediate(1));
1070 __ Bind(&string_uncompressed);
1071 }
1072 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1073 __ leal(esi, Address(str, value_offset));
1074 __ leal(edi, Address(arg, value_offset));
1075
1076 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1077 // divisible by 2.
1078 __ addl(ecx, Immediate(1));
1079 __ shrl(ecx, Immediate(1));
1080
1081 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1082 // or 4 characters (compressed) at a time.
1083 DCHECK_ALIGNED(value_offset, 4);
1084 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1085
1086 // Loop to compare strings two characters at a time starting at the beginning of the string.
1087 __ repe_cmpsl();
1088 // If strings are not equal, zero flag will be cleared.
1089 __ j(kNotEqual, &return_false);
1090
1091 // Return true and exit the function.
1092 // If loop does not result in returning false, we return true.
1093 __ Bind(&return_true);
1094 __ movl(esi, Immediate(1));
1095 __ jmp(&end);
1096
1097 // Return false and exit the function.
1098 __ Bind(&return_false);
1099 __ xorl(esi, esi);
1100 __ Bind(&end);
1101 }
1102
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1103 static void CreateStringIndexOfLocations(HInvoke* invoke,
1104 ArenaAllocator* allocator,
1105 bool start_at_zero) {
1106 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1107 LocationSummary::kCallOnSlowPath,
1108 kIntrinsified);
1109 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1110 locations->SetInAt(0, Location::RegisterLocation(EDI));
1111 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1112 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1113 // of the instruction explicitly.
1114 // Note: This works as we don't clobber EAX anywhere.
1115 locations->SetInAt(1, Location::RegisterLocation(EAX));
1116 if (!start_at_zero) {
1117 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1118 }
1119 // As we clobber EDI during execution anyways, also use it as the output.
1120 locations->SetOut(Location::SameAsFirstInput());
1121
1122 // repne scasw uses ECX as the counter.
1123 locations->AddTemp(Location::RegisterLocation(ECX));
1124 // Need another temporary to be able to compute the result.
1125 locations->AddTemp(Location::RequiresRegister());
1126 if (mirror::kUseStringCompression) {
1127 // Need another temporary to be able to save unflagged string length.
1128 locations->AddTemp(Location::RequiresRegister());
1129 }
1130 }
1131
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1132 static void GenerateStringIndexOf(HInvoke* invoke,
1133 X86Assembler* assembler,
1134 CodeGeneratorX86* codegen,
1135 bool start_at_zero) {
1136 LocationSummary* locations = invoke->GetLocations();
1137
1138 // Note that the null check must have been done earlier.
1139 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1140
1141 Register string_obj = locations->InAt(0).AsRegister<Register>();
1142 Register search_value = locations->InAt(1).AsRegister<Register>();
1143 Register counter = locations->GetTemp(0).AsRegister<Register>();
1144 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1145 Register out = locations->Out().AsRegister<Register>();
1146 // Only used when string compression feature is on.
1147 Register string_length_flagged;
1148
1149 // Check our assumptions for registers.
1150 DCHECK_EQ(string_obj, EDI);
1151 DCHECK_EQ(search_value, EAX);
1152 DCHECK_EQ(counter, ECX);
1153 DCHECK_EQ(out, EDI);
1154
1155 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1156 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1157 SlowPathCode* slow_path = nullptr;
1158 HInstruction* code_point = invoke->InputAt(1);
1159 if (code_point->IsIntConstant()) {
1160 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1161 std::numeric_limits<uint16_t>::max()) {
1162 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1163 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1164 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1165 codegen->AddSlowPath(slow_path);
1166 __ jmp(slow_path->GetEntryLabel());
1167 __ Bind(slow_path->GetExitLabel());
1168 return;
1169 }
1170 } else if (code_point->GetType() != DataType::Type::kUint16) {
1171 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1172 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1173 codegen->AddSlowPath(slow_path);
1174 __ j(kAbove, slow_path->GetEntryLabel());
1175 }
1176
1177 // From here down, we know that we are looking for a char that fits in 16 bits.
1178 // Location of reference to data array within the String object.
1179 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1180 // Location of count within the String object.
1181 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1182
1183 // Load the count field of the string containing the length and compression flag.
1184 __ movl(string_length, Address(string_obj, count_offset));
1185
1186 // Do a zero-length check. Even with string compression `count == 0` means empty.
1187 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1188 "Expecting 0=compressed, 1=uncompressed");
1189 // TODO: Support jecxz.
1190 NearLabel not_found_label;
1191 __ testl(string_length, string_length);
1192 __ j(kEqual, ¬_found_label);
1193
1194 if (mirror::kUseStringCompression) {
1195 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1196 __ movl(string_length_flagged, string_length);
1197 // Extract the length and shift out the least significant bit used as compression flag.
1198 __ shrl(string_length, Immediate(1));
1199 }
1200
1201 if (start_at_zero) {
1202 // Number of chars to scan is the same as the string length.
1203 __ movl(counter, string_length);
1204
1205 // Move to the start of the string.
1206 __ addl(string_obj, Immediate(value_offset));
1207 } else {
1208 Register start_index = locations->InAt(2).AsRegister<Register>();
1209
1210 // Do a start_index check.
1211 __ cmpl(start_index, string_length);
1212 __ j(kGreaterEqual, ¬_found_label);
1213
1214 // Ensure we have a start index >= 0;
1215 __ xorl(counter, counter);
1216 __ cmpl(start_index, Immediate(0));
1217 __ cmovl(kGreater, counter, start_index);
1218
1219 if (mirror::kUseStringCompression) {
1220 NearLabel modify_counter, offset_uncompressed_label;
1221 __ testl(string_length_flagged, Immediate(1));
1222 __ j(kNotZero, &offset_uncompressed_label);
1223 // Move to the start of the string: string_obj + value_offset + start_index.
1224 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1225 __ jmp(&modify_counter);
1226
1227 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1228 __ Bind(&offset_uncompressed_label);
1229 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1230
1231 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1232 // compare.
1233 __ Bind(&modify_counter);
1234 } else {
1235 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1236 }
1237 __ negl(counter);
1238 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1239 }
1240
1241 if (mirror::kUseStringCompression) {
1242 NearLabel uncompressed_string_comparison;
1243 NearLabel comparison_done;
1244 __ testl(string_length_flagged, Immediate(1));
1245 __ j(kNotZero, &uncompressed_string_comparison);
1246
1247 // Check if EAX (search_value) is ASCII.
1248 __ cmpl(search_value, Immediate(127));
1249 __ j(kGreater, ¬_found_label);
1250 // Comparing byte-per-byte.
1251 __ repne_scasb();
1252 __ jmp(&comparison_done);
1253
1254 // Everything is set up for repne scasw:
1255 // * Comparison address in EDI.
1256 // * Counter in ECX.
1257 __ Bind(&uncompressed_string_comparison);
1258 __ repne_scasw();
1259 __ Bind(&comparison_done);
1260 } else {
1261 __ repne_scasw();
1262 }
1263 // Did we find a match?
1264 __ j(kNotEqual, ¬_found_label);
1265
1266 // Yes, we matched. Compute the index of the result.
1267 __ subl(string_length, counter);
1268 __ leal(out, Address(string_length, -1));
1269
1270 NearLabel done;
1271 __ jmp(&done);
1272
1273 // Failed to match; return -1.
1274 __ Bind(¬_found_label);
1275 __ movl(out, Immediate(-1));
1276
1277 // And join up at the end.
1278 __ Bind(&done);
1279 if (slow_path != nullptr) {
1280 __ Bind(slow_path->GetExitLabel());
1281 }
1282 }
1283
VisitStringIndexOf(HInvoke * invoke)1284 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1285 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1286 }
1287
VisitStringIndexOf(HInvoke * invoke)1288 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1289 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1290 }
1291
VisitStringIndexOfAfter(HInvoke * invoke)1292 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1293 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1294 }
1295
VisitStringIndexOfAfter(HInvoke * invoke)1296 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1297 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1298 }
1299
VisitStringNewStringFromBytes(HInvoke * invoke)1300 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1301 LocationSummary* locations = new (allocator_) LocationSummary(
1302 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1303 InvokeRuntimeCallingConvention calling_convention;
1304 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1305 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1306 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1307 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1308 locations->SetOut(Location::RegisterLocation(EAX));
1309 }
1310
VisitStringNewStringFromBytes(HInvoke * invoke)1311 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1312 X86Assembler* assembler = GetAssembler();
1313 LocationSummary* locations = invoke->GetLocations();
1314
1315 Register byte_array = locations->InAt(0).AsRegister<Register>();
1316 __ testl(byte_array, byte_array);
1317 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1318 codegen_->AddSlowPath(slow_path);
1319 __ j(kEqual, slow_path->GetEntryLabel());
1320
1321 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1322 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1323 __ Bind(slow_path->GetExitLabel());
1324 }
1325
VisitStringNewStringFromChars(HInvoke * invoke)1326 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1327 LocationSummary* locations =
1328 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1329 InvokeRuntimeCallingConvention calling_convention;
1330 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1331 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1332 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1333 locations->SetOut(Location::RegisterLocation(EAX));
1334 }
1335
VisitStringNewStringFromChars(HInvoke * invoke)1336 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1337 // No need to emit code checking whether `locations->InAt(2)` is a null
1338 // pointer, as callers of the native method
1339 //
1340 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1341 //
1342 // all include a null check on `data` before calling that method.
1343 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1344 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1345 }
1346
VisitStringNewStringFromString(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1348 LocationSummary* locations = new (allocator_) LocationSummary(
1349 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1350 InvokeRuntimeCallingConvention calling_convention;
1351 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1352 locations->SetOut(Location::RegisterLocation(EAX));
1353 }
1354
VisitStringNewStringFromString(HInvoke * invoke)1355 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1356 X86Assembler* assembler = GetAssembler();
1357 LocationSummary* locations = invoke->GetLocations();
1358
1359 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1360 __ testl(string_to_copy, string_to_copy);
1361 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1362 codegen_->AddSlowPath(slow_path);
1363 __ j(kEqual, slow_path->GetEntryLabel());
1364
1365 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1366 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1367 __ Bind(slow_path->GetExitLabel());
1368 }
1369
VisitStringGetCharsNoCheck(HInvoke * invoke)1370 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1371 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1372 LocationSummary* locations =
1373 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1374 locations->SetInAt(0, Location::RequiresRegister());
1375 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1376 // Place srcEnd in ECX to save a move below.
1377 locations->SetInAt(2, Location::RegisterLocation(ECX));
1378 locations->SetInAt(3, Location::RequiresRegister());
1379 locations->SetInAt(4, Location::RequiresRegister());
1380
1381 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1382 // We don't have enough registers to also grab ECX, so handle below.
1383 locations->AddTemp(Location::RegisterLocation(ESI));
1384 locations->AddTemp(Location::RegisterLocation(EDI));
1385 }
1386
VisitStringGetCharsNoCheck(HInvoke * invoke)1387 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1388 X86Assembler* assembler = GetAssembler();
1389 LocationSummary* locations = invoke->GetLocations();
1390
1391 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1392 // Location of data in char array buffer.
1393 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1394 // Location of char array data in string.
1395 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1396
1397 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1398 Register obj = locations->InAt(0).AsRegister<Register>();
1399 Location srcBegin = locations->InAt(1);
1400 int srcBegin_value =
1401 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1402 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1403 Register dst = locations->InAt(3).AsRegister<Register>();
1404 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1405
1406 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1407 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1408 DCHECK_EQ(char_size, 2u);
1409
1410 // Compute the number of chars (words) to move.
1411 // Save ECX, since we don't know if it will be used later.
1412 __ pushl(ECX);
1413 int stack_adjust = kX86WordSize;
1414 __ cfi().AdjustCFAOffset(stack_adjust);
1415 DCHECK_EQ(srcEnd, ECX);
1416 if (srcBegin.IsConstant()) {
1417 __ subl(ECX, Immediate(srcBegin_value));
1418 } else {
1419 DCHECK(srcBegin.IsRegister());
1420 __ subl(ECX, srcBegin.AsRegister<Register>());
1421 }
1422
1423 NearLabel done;
1424 if (mirror::kUseStringCompression) {
1425 // Location of count in string
1426 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1427 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1428 DCHECK_EQ(c_char_size, 1u);
1429 __ pushl(EAX);
1430 __ cfi().AdjustCFAOffset(stack_adjust);
1431
1432 NearLabel copy_loop, copy_uncompressed;
1433 __ testl(Address(obj, count_offset), Immediate(1));
1434 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1435 "Expecting 0=compressed, 1=uncompressed");
1436 __ j(kNotZero, ©_uncompressed);
1437 // Compute the address of the source string by adding the number of chars from
1438 // the source beginning to the value offset of a string.
1439 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1440
1441 // Start the loop to copy String's value to Array of Char.
1442 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1443 __ Bind(©_loop);
1444 __ jecxz(&done);
1445 // Use EAX temporary (convert byte from ESI to word).
1446 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1447 __ movzxb(EAX, Address(ESI, 0));
1448 __ movw(Address(EDI, 0), EAX);
1449 __ leal(EDI, Address(EDI, char_size));
1450 __ leal(ESI, Address(ESI, c_char_size));
1451 // TODO: Add support for LOOP to X86Assembler.
1452 __ subl(ECX, Immediate(1));
1453 __ jmp(©_loop);
1454 __ Bind(©_uncompressed);
1455 }
1456
1457 // Do the copy for uncompressed string.
1458 // Compute the address of the destination buffer.
1459 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1460 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1461 __ rep_movsw();
1462
1463 __ Bind(&done);
1464 if (mirror::kUseStringCompression) {
1465 // Restore EAX.
1466 __ popl(EAX);
1467 __ cfi().AdjustCFAOffset(-stack_adjust);
1468 }
1469 // Restore ECX.
1470 __ popl(ECX);
1471 __ cfi().AdjustCFAOffset(-stack_adjust);
1472 }
1473
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1474 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1475 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1476 Location out_loc = locations->Out();
1477 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1478 // to avoid a SIGBUS.
1479 switch (size) {
1480 case DataType::Type::kInt8:
1481 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1482 break;
1483 case DataType::Type::kInt16:
1484 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1485 break;
1486 case DataType::Type::kInt32:
1487 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1488 break;
1489 case DataType::Type::kInt64:
1490 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1491 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1492 break;
1493 default:
1494 LOG(FATAL) << "Type not recognized for peek: " << size;
1495 UNREACHABLE();
1496 }
1497 }
1498
VisitMemoryPeekByte(HInvoke * invoke)1499 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1500 CreateLongToIntLocations(allocator_, invoke);
1501 }
1502
VisitMemoryPeekByte(HInvoke * invoke)1503 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1504 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1505 }
1506
VisitMemoryPeekIntNative(HInvoke * invoke)1507 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1508 CreateLongToIntLocations(allocator_, invoke);
1509 }
1510
VisitMemoryPeekIntNative(HInvoke * invoke)1511 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1512 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1513 }
1514
VisitMemoryPeekLongNative(HInvoke * invoke)1515 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1516 CreateLongToLongLocations(allocator_, invoke);
1517 }
1518
VisitMemoryPeekLongNative(HInvoke * invoke)1519 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1520 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1521 }
1522
VisitMemoryPeekShortNative(HInvoke * invoke)1523 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1524 CreateLongToIntLocations(allocator_, invoke);
1525 }
1526
VisitMemoryPeekShortNative(HInvoke * invoke)1527 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1528 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1529 }
1530
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1531 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1532 DataType::Type size,
1533 HInvoke* invoke) {
1534 LocationSummary* locations =
1535 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1536 locations->SetInAt(0, Location::RequiresRegister());
1537 HInstruction* value = invoke->InputAt(1);
1538 if (size == DataType::Type::kInt8) {
1539 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1540 } else {
1541 locations->SetInAt(1, Location::RegisterOrConstant(value));
1542 }
1543 }
1544
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1545 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1546 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1547 Location value_loc = locations->InAt(1);
1548 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1549 // to avoid a SIGBUS.
1550 switch (size) {
1551 case DataType::Type::kInt8:
1552 if (value_loc.IsConstant()) {
1553 __ movb(Address(address, 0),
1554 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1555 } else {
1556 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1557 }
1558 break;
1559 case DataType::Type::kInt16:
1560 if (value_loc.IsConstant()) {
1561 __ movw(Address(address, 0),
1562 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1563 } else {
1564 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1565 }
1566 break;
1567 case DataType::Type::kInt32:
1568 if (value_loc.IsConstant()) {
1569 __ movl(Address(address, 0),
1570 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1571 } else {
1572 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1573 }
1574 break;
1575 case DataType::Type::kInt64:
1576 if (value_loc.IsConstant()) {
1577 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1578 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1579 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1580 } else {
1581 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1582 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1583 }
1584 break;
1585 default:
1586 LOG(FATAL) << "Type not recognized for poke: " << size;
1587 UNREACHABLE();
1588 }
1589 }
1590
VisitMemoryPokeByte(HInvoke * invoke)1591 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1592 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1593 }
1594
VisitMemoryPokeByte(HInvoke * invoke)1595 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1596 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1597 }
1598
VisitMemoryPokeIntNative(HInvoke * invoke)1599 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1600 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1601 }
1602
VisitMemoryPokeIntNative(HInvoke * invoke)1603 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1604 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1605 }
1606
VisitMemoryPokeLongNative(HInvoke * invoke)1607 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1608 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1609 }
1610
VisitMemoryPokeLongNative(HInvoke * invoke)1611 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1612 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1613 }
1614
VisitMemoryPokeShortNative(HInvoke * invoke)1615 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1616 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1617 }
1618
VisitMemoryPokeShortNative(HInvoke * invoke)1619 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1620 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1621 }
1622
VisitThreadCurrentThread(HInvoke * invoke)1623 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1624 LocationSummary* locations =
1625 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1626 locations->SetOut(Location::RequiresRegister());
1627 }
1628
VisitThreadCurrentThread(HInvoke * invoke)1629 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1630 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1631 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1632 }
1633
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1634 static void GenUnsafeGet(HInvoke* invoke,
1635 DataType::Type type,
1636 bool is_volatile,
1637 CodeGeneratorX86* codegen) {
1638 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1639 LocationSummary* locations = invoke->GetLocations();
1640 Location base_loc = locations->InAt(1);
1641 Register base = base_loc.AsRegister<Register>();
1642 Location offset_loc = locations->InAt(2);
1643 Register offset = offset_loc.AsRegisterPairLow<Register>();
1644 Location output_loc = locations->Out();
1645
1646 switch (type) {
1647 case DataType::Type::kInt32: {
1648 Register output = output_loc.AsRegister<Register>();
1649 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1650 break;
1651 }
1652
1653 case DataType::Type::kReference: {
1654 Register output = output_loc.AsRegister<Register>();
1655 if (kEmitCompilerReadBarrier) {
1656 if (kUseBakerReadBarrier) {
1657 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1658 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1659 invoke, output_loc, base, src, /* needs_null_check= */ false);
1660 } else {
1661 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1662 codegen->GenerateReadBarrierSlow(
1663 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1664 }
1665 } else {
1666 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1667 __ MaybeUnpoisonHeapReference(output);
1668 }
1669 break;
1670 }
1671
1672 case DataType::Type::kInt64: {
1673 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1674 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1675 if (is_volatile) {
1676 // Need to use a XMM to read atomically.
1677 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1678 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1679 __ movd(output_lo, temp);
1680 __ psrlq(temp, Immediate(32));
1681 __ movd(output_hi, temp);
1682 } else {
1683 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1684 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1685 }
1686 }
1687 break;
1688
1689 default:
1690 LOG(FATAL) << "Unsupported op size " << type;
1691 UNREACHABLE();
1692 }
1693 }
1694
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1695 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1696 HInvoke* invoke,
1697 DataType::Type type,
1698 bool is_volatile) {
1699 bool can_call = kEmitCompilerReadBarrier &&
1700 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1701 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1702 LocationSummary* locations =
1703 new (allocator) LocationSummary(invoke,
1704 can_call
1705 ? LocationSummary::kCallOnSlowPath
1706 : LocationSummary::kNoCall,
1707 kIntrinsified);
1708 if (can_call && kUseBakerReadBarrier) {
1709 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1710 }
1711 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1712 locations->SetInAt(1, Location::RequiresRegister());
1713 locations->SetInAt(2, Location::RequiresRegister());
1714 if (type == DataType::Type::kInt64) {
1715 if (is_volatile) {
1716 // Need to use XMM to read volatile.
1717 locations->AddTemp(Location::RequiresFpuRegister());
1718 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1719 } else {
1720 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1721 }
1722 } else {
1723 locations->SetOut(Location::RequiresRegister(),
1724 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1725 }
1726 }
1727
VisitUnsafeGet(HInvoke * invoke)1728 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1729 CreateIntIntIntToIntLocations(
1730 allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1731 }
VisitUnsafeGetVolatile(HInvoke * invoke)1732 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1733 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1734 }
VisitUnsafeGetLong(HInvoke * invoke)1735 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1736 CreateIntIntIntToIntLocations(
1737 allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1738 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1739 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1740 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1741 }
VisitUnsafeGetObject(HInvoke * invoke)1742 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1743 CreateIntIntIntToIntLocations(
1744 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1745 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1746 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1747 CreateIntIntIntToIntLocations(
1748 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1749 }
1750
1751
VisitUnsafeGet(HInvoke * invoke)1752 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1753 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1754 }
VisitUnsafeGetVolatile(HInvoke * invoke)1755 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1756 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1757 }
VisitUnsafeGetLong(HInvoke * invoke)1758 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1759 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1760 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1761 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1762 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1763 }
VisitUnsafeGetObject(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1765 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1766 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1767 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1768 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1769 }
1770
1771
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1772 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1773 DataType::Type type,
1774 HInvoke* invoke,
1775 bool is_volatile) {
1776 LocationSummary* locations =
1777 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1778 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1779 locations->SetInAt(1, Location::RequiresRegister());
1780 locations->SetInAt(2, Location::RequiresRegister());
1781 locations->SetInAt(3, Location::RequiresRegister());
1782 if (type == DataType::Type::kReference) {
1783 // Need temp registers for card-marking.
1784 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1785 // Ensure the value is in a byte register.
1786 locations->AddTemp(Location::RegisterLocation(ECX));
1787 } else if (type == DataType::Type::kInt64 && is_volatile) {
1788 locations->AddTemp(Location::RequiresFpuRegister());
1789 locations->AddTemp(Location::RequiresFpuRegister());
1790 }
1791 }
1792
VisitUnsafePut(HInvoke * invoke)1793 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1794 CreateIntIntIntIntToVoidPlusTempsLocations(
1795 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1796 }
VisitUnsafePutOrdered(HInvoke * invoke)1797 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1798 CreateIntIntIntIntToVoidPlusTempsLocations(
1799 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1800 }
VisitUnsafePutVolatile(HInvoke * invoke)1801 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1802 CreateIntIntIntIntToVoidPlusTempsLocations(
1803 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1804 }
VisitUnsafePutObject(HInvoke * invoke)1805 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1806 CreateIntIntIntIntToVoidPlusTempsLocations(
1807 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1808 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1809 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1810 CreateIntIntIntIntToVoidPlusTempsLocations(
1811 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1812 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1813 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1814 CreateIntIntIntIntToVoidPlusTempsLocations(
1815 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1816 }
VisitUnsafePutLong(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1818 CreateIntIntIntIntToVoidPlusTempsLocations(
1819 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1820 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1821 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1822 CreateIntIntIntIntToVoidPlusTempsLocations(
1823 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1824 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1826 CreateIntIntIntIntToVoidPlusTempsLocations(
1827 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1828 }
1829
1830 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1831 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1832 static void GenUnsafePut(LocationSummary* locations,
1833 DataType::Type type,
1834 bool is_volatile,
1835 CodeGeneratorX86* codegen) {
1836 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1837 Register base = locations->InAt(1).AsRegister<Register>();
1838 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1839 Location value_loc = locations->InAt(3);
1840
1841 if (type == DataType::Type::kInt64) {
1842 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1843 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1844 if (is_volatile) {
1845 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1846 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1847 __ movd(temp1, value_lo);
1848 __ movd(temp2, value_hi);
1849 __ punpckldq(temp1, temp2);
1850 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1851 } else {
1852 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1853 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1854 }
1855 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1856 Register temp = locations->GetTemp(0).AsRegister<Register>();
1857 __ movl(temp, value_loc.AsRegister<Register>());
1858 __ PoisonHeapReference(temp);
1859 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1860 } else {
1861 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1862 }
1863
1864 if (is_volatile) {
1865 codegen->MemoryFence();
1866 }
1867
1868 if (type == DataType::Type::kReference) {
1869 bool value_can_be_null = true; // TODO: Worth finding out this information?
1870 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1871 locations->GetTemp(1).AsRegister<Register>(),
1872 base,
1873 value_loc.AsRegister<Register>(),
1874 value_can_be_null);
1875 }
1876 }
1877
VisitUnsafePut(HInvoke * invoke)1878 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1879 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1880 }
VisitUnsafePutOrdered(HInvoke * invoke)1881 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1882 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1883 }
VisitUnsafePutVolatile(HInvoke * invoke)1884 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1885 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1886 }
VisitUnsafePutObject(HInvoke * invoke)1887 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1888 GenUnsafePut(
1889 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1890 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1891 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1892 GenUnsafePut(
1893 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1894 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1895 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1896 GenUnsafePut(
1897 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1898 }
VisitUnsafePutLong(HInvoke * invoke)1899 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1900 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1901 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1902 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1903 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1904 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1905 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1906 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1907 }
1908
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1909 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1910 DataType::Type type,
1911 HInvoke* invoke) {
1912 bool can_call = kEmitCompilerReadBarrier &&
1913 kUseBakerReadBarrier &&
1914 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1915 LocationSummary* locations =
1916 new (allocator) LocationSummary(invoke,
1917 can_call
1918 ? LocationSummary::kCallOnSlowPath
1919 : LocationSummary::kNoCall,
1920 kIntrinsified);
1921 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1922 locations->SetInAt(1, Location::RequiresRegister());
1923 // Offset is a long, but in 32 bit mode, we only need the low word.
1924 // Can we update the invoke here to remove a TypeConvert to Long?
1925 locations->SetInAt(2, Location::RequiresRegister());
1926 // Expected value must be in EAX or EDX:EAX.
1927 // For long, new value must be in ECX:EBX.
1928 if (type == DataType::Type::kInt64) {
1929 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1930 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1931 } else {
1932 locations->SetInAt(3, Location::RegisterLocation(EAX));
1933 locations->SetInAt(4, Location::RequiresRegister());
1934 }
1935
1936 // Force a byte register for the output.
1937 locations->SetOut(Location::RegisterLocation(EAX));
1938 if (type == DataType::Type::kReference) {
1939 // Need temporary registers for card-marking, and possibly for
1940 // (Baker) read barrier.
1941 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1942 // Need a byte register for marking.
1943 locations->AddTemp(Location::RegisterLocation(ECX));
1944 }
1945 }
1946
VisitUnsafeCASInt(HInvoke * invoke)1947 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1948 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1949 }
1950
VisitUnsafeCASLong(HInvoke * invoke)1951 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1952 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1953 }
1954
VisitUnsafeCASObject(HInvoke * invoke)1955 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1956 // The only read barrier implementation supporting the
1957 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1958 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1959 return;
1960 }
1961
1962 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
1963 }
1964
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)1965 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
1966 CodeGeneratorX86* codegen,
1967 Location expected_value,
1968 Location new_value,
1969 Register base,
1970 Register offset,
1971 // Only necessary for floating point
1972 Register temp = Register::kNoRegister) {
1973 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1974
1975 if (DataType::Kind(type) == DataType::Type::kInt32) {
1976 DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
1977 }
1978
1979 // The address of the field within the holding object.
1980 Address field_addr(base, offset, TIMES_1, 0);
1981
1982 switch (type) {
1983 case DataType::Type::kBool:
1984 case DataType::Type::kInt8:
1985 __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
1986 break;
1987 case DataType::Type::kInt16:
1988 case DataType::Type::kUint16:
1989 __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
1990 break;
1991 case DataType::Type::kInt32:
1992 __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
1993 break;
1994 case DataType::Type::kFloat32: {
1995 // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
1996 DCHECK_NE(temp, EAX);
1997 // EAX is both an input and an output for cmpxchg
1998 codegen->Move32(Location::RegisterLocation(EAX), expected_value);
1999 codegen->Move32(Location::RegisterLocation(temp), new_value);
2000 __ LockCmpxchgl(field_addr, temp);
2001 break;
2002 }
2003 case DataType::Type::kInt64:
2004 // Ensure the expected value is in EAX:EDX and that the new
2005 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2006 DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2007 DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2008 DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2009 DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2010 __ LockCmpxchg8b(field_addr);
2011 break;
2012 default:
2013 LOG(FATAL) << "Unexpected CAS type " << type;
2014 }
2015 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2016 // don't need scheduling barriers at this time.
2017 }
2018
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2019 static void GenPrimitiveCAS(DataType::Type type,
2020 CodeGeneratorX86* codegen,
2021 Location expected_value,
2022 Location new_value,
2023 Register base,
2024 Register offset,
2025 Location out,
2026 // Only necessary for floating point
2027 Register temp = Register::kNoRegister,
2028 bool is_cmpxchg = false) {
2029 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2030
2031 if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2032 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2033 }
2034
2035 GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2036
2037 if (is_cmpxchg) {
2038 // Sign-extend, zero-extend or move the result if necessary
2039 switch (type) {
2040 case DataType::Type::kBool:
2041 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2042 break;
2043 case DataType::Type::kInt8:
2044 __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2045 break;
2046 case DataType::Type::kInt16:
2047 __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2048 break;
2049 case DataType::Type::kUint16:
2050 __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2051 break;
2052 case DataType::Type::kFloat32:
2053 __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2054 break;
2055 default:
2056 // Nothing to do
2057 break;
2058 }
2059 } else {
2060 // Convert ZF into the Boolean result.
2061 __ setb(kZero, out.AsRegister<Register>());
2062 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2063 }
2064 }
2065
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2066 static void GenReferenceCAS(HInvoke* invoke,
2067 CodeGeneratorX86* codegen,
2068 Location expected_value,
2069 Location new_value,
2070 Register base,
2071 Register offset,
2072 Register temp,
2073 Register temp2,
2074 bool is_cmpxchg = false) {
2075 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2076 LocationSummary* locations = invoke->GetLocations();
2077 Location out = locations->Out();
2078
2079 // The address of the field within the holding object.
2080 Address field_addr(base, offset, TIMES_1, 0);
2081
2082 Register value = new_value.AsRegister<Register>();
2083 Register expected = expected_value.AsRegister<Register>();
2084 DCHECK_EQ(expected, EAX);
2085 DCHECK_NE(temp, temp2);
2086
2087 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2088 // Need to make sure the reference stored in the field is a to-space
2089 // one before attempting the CAS or the CAS could fail incorrectly.
2090 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2091 invoke,
2092 // Unused, used only as a "temporary" within the read barrier.
2093 Location::RegisterLocation(temp),
2094 base,
2095 field_addr,
2096 /* needs_null_check= */ false,
2097 /* always_update_field= */ true,
2098 &temp2);
2099 }
2100 bool base_equals_value = (base == value);
2101 if (kPoisonHeapReferences) {
2102 if (base_equals_value) {
2103 // If `base` and `value` are the same register location, move
2104 // `value` to a temporary register. This way, poisoning
2105 // `value` won't invalidate `base`.
2106 value = temp;
2107 __ movl(value, base);
2108 }
2109
2110 // Check that the register allocator did not assign the location
2111 // of `expected` (EAX) to `value` nor to `base`, so that heap
2112 // poisoning (when enabled) works as intended below.
2113 // - If `value` were equal to `expected`, both references would
2114 // be poisoned twice, meaning they would not be poisoned at
2115 // all, as heap poisoning uses address negation.
2116 // - If `base` were equal to `expected`, poisoning `expected`
2117 // would invalidate `base`.
2118 DCHECK_NE(value, expected);
2119 DCHECK_NE(base, expected);
2120 __ PoisonHeapReference(expected);
2121 __ PoisonHeapReference(value);
2122 }
2123 __ LockCmpxchgl(field_addr, value);
2124
2125 // LOCK CMPXCHG has full barrier semantics, and we don't need
2126 // scheduling barriers at this time.
2127
2128 if (is_cmpxchg) {
2129 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2130 __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2131 } else {
2132 // Convert ZF into the Boolean result.
2133 __ setb(kZero, out.AsRegister<Register>());
2134 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2135 }
2136
2137 // Mark card for object if the new value is stored.
2138 bool value_can_be_null = true; // TODO: Worth finding out this information?
2139 NearLabel skip_mark_gc_card;
2140 __ j(kNotZero, &skip_mark_gc_card);
2141 codegen->MarkGCCard(temp, temp2, base, value, value_can_be_null);
2142 __ Bind(&skip_mark_gc_card);
2143
2144 // If heap poisoning is enabled, we need to unpoison the values
2145 // that were poisoned earlier.
2146 if (kPoisonHeapReferences) {
2147 if (base_equals_value) {
2148 // `value` has been moved to a temporary register, no need to
2149 // unpoison it.
2150 } else {
2151 // Ensure `value` is different from `out`, so that unpoisoning
2152 // the former does not invalidate the latter.
2153 DCHECK_NE(value, out.AsRegister<Register>());
2154 __ UnpoisonHeapReference(value);
2155 }
2156 }
2157 // Do not unpoison the reference contained in register
2158 // `expected`, as it is the same as register `out` (EAX).
2159 }
2160
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2161 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2162 LocationSummary* locations = invoke->GetLocations();
2163
2164 Register base = locations->InAt(1).AsRegister<Register>();
2165 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2166 Location expected_value = locations->InAt(3);
2167 Location new_value = locations->InAt(4);
2168 Location out = locations->Out();
2169 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2170
2171 if (type == DataType::Type::kReference) {
2172 // The only read barrier implementation supporting the
2173 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2174 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2175
2176 Register temp = locations->GetTemp(0).AsRegister<Register>();
2177 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2178 GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2179 } else {
2180 DCHECK(!DataType::IsFloatingPointType(type));
2181 GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2182 }
2183 }
2184
VisitUnsafeCASInt(HInvoke * invoke)2185 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2186 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2187 }
2188
VisitUnsafeCASLong(HInvoke * invoke)2189 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2190 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2191 }
2192
VisitUnsafeCASObject(HInvoke * invoke)2193 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2194 // The only read barrier implementation supporting the
2195 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2196 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2197
2198 GenCAS(DataType::Type::kReference, invoke, codegen_);
2199 }
2200
VisitIntegerReverse(HInvoke * invoke)2201 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2202 LocationSummary* locations =
2203 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2204 locations->SetInAt(0, Location::RequiresRegister());
2205 locations->SetOut(Location::SameAsFirstInput());
2206 locations->AddTemp(Location::RequiresRegister());
2207 }
2208
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2209 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2210 X86Assembler* assembler) {
2211 Immediate imm_shift(shift);
2212 Immediate imm_mask(mask);
2213 __ movl(temp, reg);
2214 __ shrl(reg, imm_shift);
2215 __ andl(temp, imm_mask);
2216 __ andl(reg, imm_mask);
2217 __ shll(temp, imm_shift);
2218 __ orl(reg, temp);
2219 }
2220
VisitIntegerReverse(HInvoke * invoke)2221 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2222 X86Assembler* assembler = GetAssembler();
2223 LocationSummary* locations = invoke->GetLocations();
2224
2225 Register reg = locations->InAt(0).AsRegister<Register>();
2226 Register temp = locations->GetTemp(0).AsRegister<Register>();
2227
2228 /*
2229 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2230 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2231 * compared to generic luni implementation which has 5 rounds of swapping bits.
2232 * x = bswap x
2233 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2234 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2235 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2236 */
2237 __ bswapl(reg);
2238 SwapBits(reg, temp, 1, 0x55555555, assembler);
2239 SwapBits(reg, temp, 2, 0x33333333, assembler);
2240 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2241 }
2242
VisitLongReverse(HInvoke * invoke)2243 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2244 LocationSummary* locations =
2245 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2246 locations->SetInAt(0, Location::RequiresRegister());
2247 locations->SetOut(Location::SameAsFirstInput());
2248 locations->AddTemp(Location::RequiresRegister());
2249 }
2250
VisitLongReverse(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2252 X86Assembler* assembler = GetAssembler();
2253 LocationSummary* locations = invoke->GetLocations();
2254
2255 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2256 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2257 Register temp = locations->GetTemp(0).AsRegister<Register>();
2258
2259 // We want to swap high/low, then bswap each one, and then do the same
2260 // as a 32 bit reverse.
2261 // Exchange high and low.
2262 __ movl(temp, reg_low);
2263 __ movl(reg_low, reg_high);
2264 __ movl(reg_high, temp);
2265
2266 // bit-reverse low
2267 __ bswapl(reg_low);
2268 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2269 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2270 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2271
2272 // bit-reverse high
2273 __ bswapl(reg_high);
2274 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2275 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2276 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2277 }
2278
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2279 static void CreateBitCountLocations(
2280 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2281 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2282 // Do nothing if there is no popcnt support. This results in generating
2283 // a call for the intrinsic rather than direct code.
2284 return;
2285 }
2286 LocationSummary* locations =
2287 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2288 if (is_long) {
2289 locations->AddTemp(Location::RequiresRegister());
2290 }
2291 locations->SetInAt(0, Location::Any());
2292 locations->SetOut(Location::RequiresRegister());
2293 }
2294
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2295 static void GenBitCount(X86Assembler* assembler,
2296 CodeGeneratorX86* codegen,
2297 HInvoke* invoke, bool is_long) {
2298 LocationSummary* locations = invoke->GetLocations();
2299 Location src = locations->InAt(0);
2300 Register out = locations->Out().AsRegister<Register>();
2301
2302 if (invoke->InputAt(0)->IsConstant()) {
2303 // Evaluate this at compile time.
2304 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2305 int32_t result = is_long
2306 ? POPCOUNT(static_cast<uint64_t>(value))
2307 : POPCOUNT(static_cast<uint32_t>(value));
2308 codegen->Load32BitValue(out, result);
2309 return;
2310 }
2311
2312 // Handle the non-constant cases.
2313 if (!is_long) {
2314 if (src.IsRegister()) {
2315 __ popcntl(out, src.AsRegister<Register>());
2316 } else {
2317 DCHECK(src.IsStackSlot());
2318 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2319 }
2320 } else {
2321 // The 64-bit case needs to worry about two parts.
2322 Register temp = locations->GetTemp(0).AsRegister<Register>();
2323 if (src.IsRegisterPair()) {
2324 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2325 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2326 } else {
2327 DCHECK(src.IsDoubleStackSlot());
2328 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2329 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2330 }
2331 __ addl(out, temp);
2332 }
2333 }
2334
VisitIntegerBitCount(HInvoke * invoke)2335 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2336 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2337 }
2338
VisitIntegerBitCount(HInvoke * invoke)2339 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2340 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2341 }
2342
VisitLongBitCount(HInvoke * invoke)2343 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2344 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2345 }
2346
VisitLongBitCount(HInvoke * invoke)2347 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2348 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2349 }
2350
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2351 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2352 LocationSummary* locations =
2353 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2354 if (is_long) {
2355 locations->SetInAt(0, Location::RequiresRegister());
2356 } else {
2357 locations->SetInAt(0, Location::Any());
2358 }
2359 locations->SetOut(Location::RequiresRegister());
2360 }
2361
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2362 static void GenLeadingZeros(X86Assembler* assembler,
2363 CodeGeneratorX86* codegen,
2364 HInvoke* invoke, bool is_long) {
2365 LocationSummary* locations = invoke->GetLocations();
2366 Location src = locations->InAt(0);
2367 Register out = locations->Out().AsRegister<Register>();
2368
2369 if (invoke->InputAt(0)->IsConstant()) {
2370 // Evaluate this at compile time.
2371 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2372 if (value == 0) {
2373 value = is_long ? 64 : 32;
2374 } else {
2375 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2376 }
2377 codegen->Load32BitValue(out, value);
2378 return;
2379 }
2380
2381 // Handle the non-constant cases.
2382 if (!is_long) {
2383 if (src.IsRegister()) {
2384 __ bsrl(out, src.AsRegister<Register>());
2385 } else {
2386 DCHECK(src.IsStackSlot());
2387 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2388 }
2389
2390 // BSR sets ZF if the input was zero, and the output is undefined.
2391 NearLabel all_zeroes, done;
2392 __ j(kEqual, &all_zeroes);
2393
2394 // Correct the result from BSR to get the final CLZ result.
2395 __ xorl(out, Immediate(31));
2396 __ jmp(&done);
2397
2398 // Fix the zero case with the expected result.
2399 __ Bind(&all_zeroes);
2400 __ movl(out, Immediate(32));
2401
2402 __ Bind(&done);
2403 return;
2404 }
2405
2406 // 64 bit case needs to worry about both parts of the register.
2407 DCHECK(src.IsRegisterPair());
2408 Register src_lo = src.AsRegisterPairLow<Register>();
2409 Register src_hi = src.AsRegisterPairHigh<Register>();
2410 NearLabel handle_low, done, all_zeroes;
2411
2412 // Is the high word zero?
2413 __ testl(src_hi, src_hi);
2414 __ j(kEqual, &handle_low);
2415
2416 // High word is not zero. We know that the BSR result is defined in this case.
2417 __ bsrl(out, src_hi);
2418
2419 // Correct the result from BSR to get the final CLZ result.
2420 __ xorl(out, Immediate(31));
2421 __ jmp(&done);
2422
2423 // High word was zero. We have to compute the low word count and add 32.
2424 __ Bind(&handle_low);
2425 __ bsrl(out, src_lo);
2426 __ j(kEqual, &all_zeroes);
2427
2428 // We had a valid result. Use an XOR to both correct the result and add 32.
2429 __ xorl(out, Immediate(63));
2430 __ jmp(&done);
2431
2432 // All zero case.
2433 __ Bind(&all_zeroes);
2434 __ movl(out, Immediate(64));
2435
2436 __ Bind(&done);
2437 }
2438
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2439 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2440 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2441 }
2442
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2443 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2444 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2445 }
2446
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2447 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2448 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2449 }
2450
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2451 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2452 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2453 }
2454
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2455 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2456 LocationSummary* locations =
2457 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2458 if (is_long) {
2459 locations->SetInAt(0, Location::RequiresRegister());
2460 } else {
2461 locations->SetInAt(0, Location::Any());
2462 }
2463 locations->SetOut(Location::RequiresRegister());
2464 }
2465
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2466 static void GenTrailingZeros(X86Assembler* assembler,
2467 CodeGeneratorX86* codegen,
2468 HInvoke* invoke, bool is_long) {
2469 LocationSummary* locations = invoke->GetLocations();
2470 Location src = locations->InAt(0);
2471 Register out = locations->Out().AsRegister<Register>();
2472
2473 if (invoke->InputAt(0)->IsConstant()) {
2474 // Evaluate this at compile time.
2475 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2476 if (value == 0) {
2477 value = is_long ? 64 : 32;
2478 } else {
2479 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2480 }
2481 codegen->Load32BitValue(out, value);
2482 return;
2483 }
2484
2485 // Handle the non-constant cases.
2486 if (!is_long) {
2487 if (src.IsRegister()) {
2488 __ bsfl(out, src.AsRegister<Register>());
2489 } else {
2490 DCHECK(src.IsStackSlot());
2491 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2492 }
2493
2494 // BSF sets ZF if the input was zero, and the output is undefined.
2495 NearLabel done;
2496 __ j(kNotEqual, &done);
2497
2498 // Fix the zero case with the expected result.
2499 __ movl(out, Immediate(32));
2500
2501 __ Bind(&done);
2502 return;
2503 }
2504
2505 // 64 bit case needs to worry about both parts of the register.
2506 DCHECK(src.IsRegisterPair());
2507 Register src_lo = src.AsRegisterPairLow<Register>();
2508 Register src_hi = src.AsRegisterPairHigh<Register>();
2509 NearLabel done, all_zeroes;
2510
2511 // If the low word is zero, then ZF will be set. If not, we have the answer.
2512 __ bsfl(out, src_lo);
2513 __ j(kNotEqual, &done);
2514
2515 // Low word was zero. We have to compute the high word count and add 32.
2516 __ bsfl(out, src_hi);
2517 __ j(kEqual, &all_zeroes);
2518
2519 // We had a valid result. Add 32 to account for the low word being zero.
2520 __ addl(out, Immediate(32));
2521 __ jmp(&done);
2522
2523 // All zero case.
2524 __ Bind(&all_zeroes);
2525 __ movl(out, Immediate(64));
2526
2527 __ Bind(&done);
2528 }
2529
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2530 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2531 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2532 }
2533
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2534 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2535 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2536 }
2537
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2538 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2539 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2540 }
2541
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2542 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2543 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2544 }
2545
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2546 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2547 return instruction->InputAt(input0) == instruction->InputAt(input1);
2548 }
2549
2550 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2551 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2552 DataType::Type type,
2553 const Register& array,
2554 const Location& pos,
2555 const Register& base) {
2556 // This routine is only used by the SystemArrayCopy intrinsic at the
2557 // moment. We can allow DataType::Type::kReference as `type` to implement
2558 // the SystemArrayCopyChar intrinsic.
2559 DCHECK_EQ(type, DataType::Type::kReference);
2560 const int32_t element_size = DataType::Size(type);
2561 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2562 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2563
2564 if (pos.IsConstant()) {
2565 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2566 __ leal(base, Address(array, element_size * constant + data_offset));
2567 } else {
2568 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2569 }
2570 }
2571
2572 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2573 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2574 DataType::Type type,
2575 const Location& copy_length,
2576 const Register& base,
2577 const Register& end) {
2578 // This routine is only used by the SystemArrayCopy intrinsic at the
2579 // moment. We can allow DataType::Type::kReference as `type` to implement
2580 // the SystemArrayCopyChar intrinsic.
2581 DCHECK_EQ(type, DataType::Type::kReference);
2582 const int32_t element_size = DataType::Size(type);
2583 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2584
2585 if (copy_length.IsConstant()) {
2586 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2587 __ leal(end, Address(base, element_size * constant));
2588 } else {
2589 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2590 }
2591 }
2592
VisitSystemArrayCopy(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2594 // The only read barrier implementation supporting the
2595 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2596 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2597 return;
2598 }
2599
2600 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2601 if (invoke->GetLocations() != nullptr) {
2602 // Need a byte register for marking.
2603 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2604
2605 static constexpr size_t kSrc = 0;
2606 static constexpr size_t kSrcPos = 1;
2607 static constexpr size_t kDest = 2;
2608 static constexpr size_t kDestPos = 3;
2609 static constexpr size_t kLength = 4;
2610
2611 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2612 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2613 !invoke->InputAt(kLength)->IsIntConstant()) {
2614 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2615 !IsSameInput(invoke, kSrcPos, kLength) &&
2616 !IsSameInput(invoke, kDestPos, kLength) &&
2617 !IsSameInput(invoke, kSrc, kDest)) {
2618 // Not enough registers, make the length also take a stack slot.
2619 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2620 }
2621 }
2622 }
2623 }
2624
VisitSystemArrayCopy(HInvoke * invoke)2625 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2626 // The only read barrier implementation supporting the
2627 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2628 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2629
2630 X86Assembler* assembler = GetAssembler();
2631 LocationSummary* locations = invoke->GetLocations();
2632
2633 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2634 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2635 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2636 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2637 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2638
2639 Register src = locations->InAt(0).AsRegister<Register>();
2640 Location src_pos = locations->InAt(1);
2641 Register dest = locations->InAt(2).AsRegister<Register>();
2642 Location dest_pos = locations->InAt(3);
2643 Location length_arg = locations->InAt(4);
2644 Location length = length_arg;
2645 Location temp1_loc = locations->GetTemp(0);
2646 Register temp1 = temp1_loc.AsRegister<Register>();
2647 Location temp2_loc = locations->GetTemp(1);
2648 Register temp2 = temp2_loc.AsRegister<Register>();
2649
2650 SlowPathCode* intrinsic_slow_path =
2651 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2652 codegen_->AddSlowPath(intrinsic_slow_path);
2653
2654 NearLabel conditions_on_positions_validated;
2655 SystemArrayCopyOptimizations optimizations(invoke);
2656
2657 // If source and destination are the same, we go to slow path if we need to do
2658 // forward copying.
2659 if (src_pos.IsConstant()) {
2660 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2661 if (dest_pos.IsConstant()) {
2662 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2663 if (optimizations.GetDestinationIsSource()) {
2664 // Checked when building locations.
2665 DCHECK_GE(src_pos_constant, dest_pos_constant);
2666 } else if (src_pos_constant < dest_pos_constant) {
2667 __ cmpl(src, dest);
2668 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2669 }
2670 } else {
2671 if (!optimizations.GetDestinationIsSource()) {
2672 __ cmpl(src, dest);
2673 __ j(kNotEqual, &conditions_on_positions_validated);
2674 }
2675 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2676 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2677 }
2678 } else {
2679 if (!optimizations.GetDestinationIsSource()) {
2680 __ cmpl(src, dest);
2681 __ j(kNotEqual, &conditions_on_positions_validated);
2682 }
2683 if (dest_pos.IsConstant()) {
2684 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2685 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2686 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2687 } else {
2688 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2689 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2690 }
2691 }
2692
2693 __ Bind(&conditions_on_positions_validated);
2694
2695 if (!optimizations.GetSourceIsNotNull()) {
2696 // Bail out if the source is null.
2697 __ testl(src, src);
2698 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2699 }
2700
2701 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2702 // Bail out if the destination is null.
2703 __ testl(dest, dest);
2704 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2705 }
2706
2707 Location temp3_loc = locations->GetTemp(2);
2708 Register temp3 = temp3_loc.AsRegister<Register>();
2709 if (length.IsStackSlot()) {
2710 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2711 length = Location::RegisterLocation(temp3);
2712 }
2713
2714 // If the length is negative, bail out.
2715 // We have already checked in the LocationsBuilder for the constant case.
2716 if (!length.IsConstant() &&
2717 !optimizations.GetCountIsSourceLength() &&
2718 !optimizations.GetCountIsDestinationLength()) {
2719 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2720 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2721 }
2722
2723 // Validity checks: source.
2724 CheckPosition(assembler,
2725 src_pos,
2726 src,
2727 length,
2728 intrinsic_slow_path,
2729 temp1,
2730 optimizations.GetCountIsSourceLength());
2731
2732 // Validity checks: dest.
2733 CheckPosition(assembler,
2734 dest_pos,
2735 dest,
2736 length,
2737 intrinsic_slow_path,
2738 temp1,
2739 optimizations.GetCountIsDestinationLength());
2740
2741 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2742 // Check whether all elements of the source array are assignable to the component
2743 // type of the destination array. We do two checks: the classes are the same,
2744 // or the destination is Object[]. If none of these checks succeed, we go to the
2745 // slow path.
2746
2747 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2748 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2749 // /* HeapReference<Class> */ temp1 = src->klass_
2750 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2751 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2752 // Bail out if the source is not a non primitive array.
2753 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2754 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2755 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2756 __ testl(temp1, temp1);
2757 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2758 // If heap poisoning is enabled, `temp1` has been unpoisoned
2759 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2760 } else {
2761 // /* HeapReference<Class> */ temp1 = src->klass_
2762 __ movl(temp1, Address(src, class_offset));
2763 __ MaybeUnpoisonHeapReference(temp1);
2764 // Bail out if the source is not a non primitive array.
2765 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2766 __ movl(temp1, Address(temp1, component_offset));
2767 __ testl(temp1, temp1);
2768 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2769 __ MaybeUnpoisonHeapReference(temp1);
2770 }
2771 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2772 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2773 }
2774
2775 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2776 if (length.Equals(Location::RegisterLocation(temp3))) {
2777 // When Baker read barriers are enabled, register `temp3`,
2778 // which in the present case contains the `length` parameter,
2779 // will be overwritten below. Make the `length` location
2780 // reference the original stack location; it will be moved
2781 // back to `temp3` later if necessary.
2782 DCHECK(length_arg.IsStackSlot());
2783 length = length_arg;
2784 }
2785
2786 // /* HeapReference<Class> */ temp1 = dest->klass_
2787 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2788 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2789
2790 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2791 // Bail out if the destination is not a non primitive array.
2792 //
2793 // Register `temp1` is not trashed by the read barrier emitted
2794 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2795 // method produces a call to a ReadBarrierMarkRegX entry point,
2796 // which saves all potentially live registers, including
2797 // temporaries such a `temp1`.
2798 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2799 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2800 invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2801 __ testl(temp2, temp2);
2802 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2803 // If heap poisoning is enabled, `temp2` has been unpoisoned
2804 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2805 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2806 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2807 }
2808
2809 // For the same reason given earlier, `temp1` is not trashed by the
2810 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2811 // /* HeapReference<Class> */ temp2 = src->klass_
2812 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2813 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2814 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2815 __ cmpl(temp1, temp2);
2816
2817 if (optimizations.GetDestinationIsTypedObjectArray()) {
2818 NearLabel do_copy;
2819 __ j(kEqual, &do_copy);
2820 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2821 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2822 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2823 // We do not need to emit a read barrier for the following
2824 // heap reference load, as `temp1` is only used in a
2825 // comparison with null below, and this reference is not
2826 // kept afterwards.
2827 __ cmpl(Address(temp1, super_offset), Immediate(0));
2828 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2829 __ Bind(&do_copy);
2830 } else {
2831 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2832 }
2833 } else {
2834 // Non read barrier code.
2835
2836 // /* HeapReference<Class> */ temp1 = dest->klass_
2837 __ movl(temp1, Address(dest, class_offset));
2838 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2839 __ MaybeUnpoisonHeapReference(temp1);
2840 // Bail out if the destination is not a non primitive array.
2841 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2842 __ movl(temp2, Address(temp1, component_offset));
2843 __ testl(temp2, temp2);
2844 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2845 __ MaybeUnpoisonHeapReference(temp2);
2846 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2847 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2848 // Re-poison the heap reference to make the compare instruction below
2849 // compare two poisoned references.
2850 __ PoisonHeapReference(temp1);
2851 }
2852
2853 // Note: if heap poisoning is on, we are comparing two poisoned references here.
2854 __ cmpl(temp1, Address(src, class_offset));
2855
2856 if (optimizations.GetDestinationIsTypedObjectArray()) {
2857 NearLabel do_copy;
2858 __ j(kEqual, &do_copy);
2859 __ MaybeUnpoisonHeapReference(temp1);
2860 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2861 __ movl(temp1, Address(temp1, component_offset));
2862 __ MaybeUnpoisonHeapReference(temp1);
2863 __ cmpl(Address(temp1, super_offset), Immediate(0));
2864 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2865 __ Bind(&do_copy);
2866 } else {
2867 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2868 }
2869 }
2870 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2871 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2872 // Bail out if the source is not a non primitive array.
2873 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2874 // /* HeapReference<Class> */ temp1 = src->klass_
2875 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2876 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2877 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2878 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2879 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2880 __ testl(temp1, temp1);
2881 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2882 // If heap poisoning is enabled, `temp1` has been unpoisoned
2883 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2884 } else {
2885 // /* HeapReference<Class> */ temp1 = src->klass_
2886 __ movl(temp1, Address(src, class_offset));
2887 __ MaybeUnpoisonHeapReference(temp1);
2888 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2889 __ movl(temp1, Address(temp1, component_offset));
2890 __ testl(temp1, temp1);
2891 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2892 __ MaybeUnpoisonHeapReference(temp1);
2893 }
2894 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2895 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2896 }
2897
2898 const DataType::Type type = DataType::Type::kReference;
2899 const int32_t element_size = DataType::Size(type);
2900
2901 // Compute the base source address in `temp1`.
2902 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2903
2904 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2905 // If it is needed (in the case of the fast-path loop), the base
2906 // destination address is computed later, as `temp2` is used for
2907 // intermediate computations.
2908
2909 // Compute the end source address in `temp3`.
2910 if (length.IsStackSlot()) {
2911 // Location `length` is again pointing at a stack slot, as
2912 // register `temp3` (which was containing the length parameter
2913 // earlier) has been overwritten; restore it now
2914 DCHECK(length.Equals(length_arg));
2915 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2916 length = Location::RegisterLocation(temp3);
2917 }
2918 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2919
2920 // SystemArrayCopy implementation for Baker read barriers (see
2921 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2922 //
2923 // if (src_ptr != end_ptr) {
2924 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2925 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2926 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2927 // if (is_gray) {
2928 // // Slow-path copy.
2929 // for (size_t i = 0; i != length; ++i) {
2930 // dest_array[dest_pos + i] =
2931 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2932 // }
2933 // } else {
2934 // // Fast-path copy.
2935 // do {
2936 // *dest_ptr++ = *src_ptr++;
2937 // } while (src_ptr != end_ptr)
2938 // }
2939 // }
2940
2941 NearLabel loop, done;
2942
2943 // Don't enter copy loop if `length == 0`.
2944 __ cmpl(temp1, temp3);
2945 __ j(kEqual, &done);
2946
2947 // Given the numeric representation, it's enough to check the low bit of the rb_state.
2948 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2949 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2950 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2951 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2952 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2953
2954 // if (rb_state == ReadBarrier::GrayState())
2955 // goto slow_path;
2956 // At this point, just do the "if" and make sure that flags are preserved until the branch.
2957 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2958
2959 // Load fence to prevent load-load reordering.
2960 // Note that this is a no-op, thanks to the x86 memory model.
2961 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2962
2963 // Slow path used to copy array when `src` is gray.
2964 SlowPathCode* read_barrier_slow_path =
2965 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2966 codegen_->AddSlowPath(read_barrier_slow_path);
2967
2968 // We have done the "if" of the gray bit check above, now branch based on the flags.
2969 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2970
2971 // Fast-path copy.
2972 // Compute the base destination address in `temp2`.
2973 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2974 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2975 // poison/unpoison.
2976 __ Bind(&loop);
2977 __ pushl(Address(temp1, 0));
2978 __ cfi().AdjustCFAOffset(4);
2979 __ popl(Address(temp2, 0));
2980 __ cfi().AdjustCFAOffset(-4);
2981 __ addl(temp1, Immediate(element_size));
2982 __ addl(temp2, Immediate(element_size));
2983 __ cmpl(temp1, temp3);
2984 __ j(kNotEqual, &loop);
2985
2986 __ Bind(read_barrier_slow_path->GetExitLabel());
2987 __ Bind(&done);
2988 } else {
2989 // Non read barrier code.
2990 // Compute the base destination address in `temp2`.
2991 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2992 // Compute the end source address in `temp3`.
2993 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2994 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2995 // poison/unpoison.
2996 NearLabel loop, done;
2997 __ cmpl(temp1, temp3);
2998 __ j(kEqual, &done);
2999 __ Bind(&loop);
3000 __ pushl(Address(temp1, 0));
3001 __ cfi().AdjustCFAOffset(4);
3002 __ popl(Address(temp2, 0));
3003 __ cfi().AdjustCFAOffset(-4);
3004 __ addl(temp1, Immediate(element_size));
3005 __ addl(temp2, Immediate(element_size));
3006 __ cmpl(temp1, temp3);
3007 __ j(kNotEqual, &loop);
3008 __ Bind(&done);
3009 }
3010
3011 // We only need one card marking on the destination array.
3012 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
3013
3014 __ Bind(intrinsic_slow_path->GetExitLabel());
3015 }
3016
RequestBaseMethodAddressInRegister(HInvoke * invoke)3017 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3018 LocationSummary* locations = invoke->GetLocations();
3019 if (locations != nullptr) {
3020 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3021 // Note: The base method address is not present yet when this is called from the
3022 // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3023 if (invoke_static_or_direct->HasSpecialInput()) {
3024 DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3025 ->IsX86ComputeBaseMethodAddress());
3026 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3027 Location::RequiresRegister());
3028 }
3029 }
3030 }
3031
VisitIntegerValueOf(HInvoke * invoke)3032 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3033 DCHECK(invoke->IsInvokeStaticOrDirect());
3034 InvokeRuntimeCallingConvention calling_convention;
3035 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3036 invoke,
3037 codegen_,
3038 Location::RegisterLocation(EAX),
3039 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3040 RequestBaseMethodAddressInRegister(invoke);
3041 }
3042
VisitIntegerValueOf(HInvoke * invoke)3043 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3044 DCHECK(invoke->IsInvokeStaticOrDirect());
3045 IntrinsicVisitor::IntegerValueOfInfo info =
3046 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3047 LocationSummary* locations = invoke->GetLocations();
3048 X86Assembler* assembler = GetAssembler();
3049
3050 Register out = locations->Out().AsRegister<Register>();
3051 auto allocate_instance = [&]() {
3052 DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3053 codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3054 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3055 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3056 };
3057 if (invoke->InputAt(0)->IsConstant()) {
3058 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3059 if (static_cast<uint32_t>(value - info.low) < info.length) {
3060 // Just embed the j.l.Integer in the code.
3061 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3062 codegen_->LoadBootImageAddress(
3063 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3064 } else {
3065 DCHECK(locations->CanCall());
3066 // Allocate and initialize a new j.l.Integer.
3067 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3068 // JIT object table.
3069 allocate_instance();
3070 __ movl(Address(out, info.value_offset), Immediate(value));
3071 }
3072 } else {
3073 DCHECK(locations->CanCall());
3074 Register in = locations->InAt(0).AsRegister<Register>();
3075 // Check bounds of our cache.
3076 __ leal(out, Address(in, -info.low));
3077 __ cmpl(out, Immediate(info.length));
3078 NearLabel allocate, done;
3079 __ j(kAboveEqual, &allocate);
3080 // If the value is within the bounds, load the j.l.Integer directly from the array.
3081 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3082 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3083 "Check heap reference size.");
3084 if (codegen_->GetCompilerOptions().IsBootImage()) {
3085 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3086 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3087 HX86ComputeBaseMethodAddress* method_address =
3088 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3089 DCHECK(method_address != nullptr);
3090 Register method_address_reg =
3091 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3092 __ movl(out,
3093 Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3094 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3095 } else {
3096 // Note: We're about to clobber the index in `out`, so we need to use `in` and
3097 // adjust the offset accordingly.
3098 uint32_t mid_array_boot_image_offset =
3099 info.array_data_boot_image_reference - info.low * kElementSize;
3100 codegen_->LoadBootImageAddress(
3101 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3102 DCHECK_NE(out, in);
3103 __ movl(out, Address(out, in, TIMES_4, 0));
3104 }
3105 __ MaybeUnpoisonHeapReference(out);
3106 __ jmp(&done);
3107 __ Bind(&allocate);
3108 // Otherwise allocate and initialize a new j.l.Integer.
3109 allocate_instance();
3110 __ movl(Address(out, info.value_offset), in);
3111 __ Bind(&done);
3112 }
3113 }
3114
VisitReferenceGetReferent(HInvoke * invoke)3115 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3116 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3117 RequestBaseMethodAddressInRegister(invoke);
3118 }
3119
VisitReferenceGetReferent(HInvoke * invoke)3120 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3121 X86Assembler* assembler = GetAssembler();
3122 LocationSummary* locations = invoke->GetLocations();
3123
3124 Location obj = locations->InAt(0);
3125 Location out = locations->Out();
3126
3127 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3128 codegen_->AddSlowPath(slow_path);
3129
3130 if (kEmitCompilerReadBarrier) {
3131 // Check self->GetWeakRefAccessEnabled().
3132 ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3133 __ fs()->cmpl(Address::Absolute(offset), Immediate(0));
3134 __ j(kEqual, slow_path->GetEntryLabel());
3135 }
3136
3137 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3138 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3139 invoke->AsInvokeStaticOrDirect());
3140
3141 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3142 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3143 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3144 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3145 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3146 __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3147 Immediate(0));
3148 __ j(kNotEqual, slow_path->GetEntryLabel());
3149
3150 // Load the value from the field.
3151 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3152 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3153 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3154 out,
3155 obj.AsRegister<Register>(),
3156 referent_offset,
3157 /*needs_null_check=*/ true);
3158 // Note that the fence is a no-op, thanks to the x86 memory model.
3159 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3160 } else {
3161 __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3162 codegen_->MaybeRecordImplicitNullCheck(invoke);
3163 // Note that the fence is a no-op, thanks to the x86 memory model.
3164 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3165 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3166 }
3167 __ Bind(slow_path->GetExitLabel());
3168 }
3169
VisitReferenceRefersTo(HInvoke * invoke)3170 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3171 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3172 }
3173
VisitReferenceRefersTo(HInvoke * invoke)3174 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3175 X86Assembler* assembler = GetAssembler();
3176 LocationSummary* locations = invoke->GetLocations();
3177
3178 Register obj = locations->InAt(0).AsRegister<Register>();
3179 Register other = locations->InAt(1).AsRegister<Register>();
3180 Register out = locations->Out().AsRegister<Register>();
3181
3182 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3183 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3184
3185 __ movl(out, Address(obj, referent_offset));
3186 codegen_->MaybeRecordImplicitNullCheck(invoke);
3187 __ MaybeUnpoisonHeapReference(out);
3188 // Note that the fence is a no-op, thanks to the x86 memory model.
3189 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3190
3191 NearLabel end, return_true, return_false;
3192 __ cmpl(out, other);
3193
3194 if (kEmitCompilerReadBarrier) {
3195 DCHECK(kUseBakerReadBarrier);
3196
3197 __ j(kEqual, &return_true);
3198
3199 // Check if the loaded reference is null.
3200 __ testl(out, out);
3201 __ j(kZero, &return_false);
3202
3203 // For correct memory visibility, we need a barrier before loading the lock word
3204 // but we already have the barrier emitted for volatile load above which is sufficient.
3205
3206 // Load the lockword and check if it is a forwarding address.
3207 static_assert(LockWord::kStateShift == 30u);
3208 static_assert(LockWord::kStateForwardingAddress == 3u);
3209 __ movl(out, Address(out, monitor_offset));
3210 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3211 __ j(kBelow, &return_false);
3212
3213 // Extract the forwarding address and compare with `other`.
3214 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3215 __ cmpl(out, other);
3216 }
3217
3218 __ j(kNotEqual, &return_false);
3219
3220 // Return true and exit the function.
3221 __ Bind(&return_true);
3222 __ movl(out, Immediate(1));
3223 __ jmp(&end);
3224
3225 // Return false and exit the function.
3226 __ Bind(&return_false);
3227 __ xorl(out, out);
3228 __ Bind(&end);
3229 }
3230
VisitThreadInterrupted(HInvoke * invoke)3231 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3232 LocationSummary* locations =
3233 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3234 locations->SetOut(Location::RequiresRegister());
3235 }
3236
VisitThreadInterrupted(HInvoke * invoke)3237 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3238 X86Assembler* assembler = GetAssembler();
3239 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3240 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3241 NearLabel done;
3242 __ fs()->movl(out, address);
3243 __ testl(out, out);
3244 __ j(kEqual, &done);
3245 __ fs()->movl(address, Immediate(0));
3246 codegen_->MemoryFence();
3247 __ Bind(&done);
3248 }
3249
VisitReachabilityFence(HInvoke * invoke)3250 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3251 LocationSummary* locations =
3252 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3253 locations->SetInAt(0, Location::Any());
3254 }
3255
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3256 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3257
VisitIntegerDivideUnsigned(HInvoke * invoke)3258 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3259 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3260 LocationSummary::kCallOnSlowPath,
3261 kIntrinsified);
3262 locations->SetInAt(0, Location::RegisterLocation(EAX));
3263 locations->SetInAt(1, Location::RequiresRegister());
3264 locations->SetOut(Location::SameAsFirstInput());
3265 // Intel uses edx:eax as the dividend.
3266 locations->AddTemp(Location::RegisterLocation(EDX));
3267 }
3268
VisitIntegerDivideUnsigned(HInvoke * invoke)3269 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3270 X86Assembler* assembler = GetAssembler();
3271 LocationSummary* locations = invoke->GetLocations();
3272 Location out = locations->Out();
3273 Location first = locations->InAt(0);
3274 Location second = locations->InAt(1);
3275 Register edx = locations->GetTemp(0).AsRegister<Register>();
3276 Register second_reg = second.AsRegister<Register>();
3277
3278 DCHECK_EQ(EAX, first.AsRegister<Register>());
3279 DCHECK_EQ(EAX, out.AsRegister<Register>());
3280 DCHECK_EQ(EDX, edx);
3281
3282 // Check if divisor is zero, bail to managed implementation to handle.
3283 __ testl(second_reg, second_reg);
3284 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3285 codegen_->AddSlowPath(slow_path);
3286 __ j(kEqual, slow_path->GetEntryLabel());
3287
3288 __ xorl(edx, edx);
3289 __ divl(second_reg);
3290
3291 __ Bind(slow_path->GetExitLabel());
3292 }
3293
IsValidFieldVarHandleExpected(HInvoke * invoke)3294 static bool IsValidFieldVarHandleExpected(HInvoke* invoke) {
3295 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3296 if (expected_coordinates_count > 1u) {
3297 // Only static and instance fields VarHandle are supported now.
3298 return false;
3299 }
3300
3301 if (expected_coordinates_count == 1u &&
3302 invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
3303 // For instance fields, the source object must be a reference
3304 return false;
3305 }
3306
3307 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3308 DataType::Type return_type = invoke->GetType();
3309 mirror::VarHandle::AccessModeTemplate access_mode_template =
3310 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
3311 switch (access_mode_template) {
3312 case mirror::VarHandle::AccessModeTemplate::kGet:
3313 // The return type should be the same as varType, so it shouldn't be void.
3314 if (return_type == DataType::Type::kVoid) {
3315 return false;
3316 }
3317 break;
3318 case mirror::VarHandle::AccessModeTemplate::kSet:
3319 if (return_type != DataType::Type::kVoid) {
3320 return false;
3321 }
3322 break;
3323 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
3324 if (return_type != DataType::Type::kBool) {
3325 return false;
3326 }
3327 uint32_t expected_value_index = number_of_arguments - 2;
3328 uint32_t new_value_index = number_of_arguments - 1;
3329 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
3330 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
3331
3332 if (expected_value_type != new_value_type) {
3333 return false;
3334 }
3335 break;
3336 }
3337 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3338 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
3339 if (IsVarHandleGetAndAdd(invoke) &&
3340 (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
3341 // We should only add numerical types.
3342 return false;
3343 } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
3344 // We can only apply operators to bitwise integral types.
3345 // Note that bitwise VarHandle operations accept a non-integral boolean type and
3346 // perform the appropriate logical operation. However, the result is the same as
3347 // using the bitwise operation on our boolean representation and this fits well
3348 // with DataType::IsIntegralType() treating the compiler type kBool as integral.
3349 return false;
3350 }
3351 if (value_type != return_type) {
3352 return false;
3353 }
3354 break;
3355 }
3356 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3357 uint32_t expected_value_index = number_of_arguments - 2;
3358 uint32_t new_value_index = number_of_arguments - 1;
3359 DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
3360 DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
3361
3362 if (expected_value_type != new_value_type || return_type != expected_value_type) {
3363 return false;
3364 }
3365 break;
3366 }
3367 }
3368
3369 return true;
3370 }
3371
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3372 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3373 mirror::VarHandle::AccessMode access_mode,
3374 SlowPathCode* slow_path,
3375 X86Assembler* assembler) {
3376 const uint32_t access_modes_bitmask_offset =
3377 mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3378 const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3379
3380 // If the access mode is not supported, bail to runtime implementation to handle
3381 __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3382 __ j(kZero, slow_path->GetEntryLabel());
3383 }
3384
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3385 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3386 SlowPathCode* slow_path,
3387 X86Assembler* assembler) {
3388 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3389
3390 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3391 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3392 __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3393 __ j(kNotEqual, slow_path->GetEntryLabel());
3394 }
3395
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3396 static void GenerateSubTypeObjectCheck(Register object,
3397 Register temp,
3398 Address type_address,
3399 SlowPathCode* slow_path,
3400 X86Assembler* assembler,
3401 bool object_can_be_null = true) {
3402 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3403 const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3404 NearLabel check_type_compatibility, type_matched;
3405
3406 // If the object is null, there is no need to check the type
3407 if (object_can_be_null) {
3408 __ testl(object, object);
3409 __ j(kZero, &type_matched);
3410 }
3411
3412 // Do not unpoison for in-memory comparison.
3413 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3414 __ movl(temp, Address(object, class_offset));
3415 __ Bind(&check_type_compatibility);
3416 __ cmpl(temp, type_address);
3417 __ j(kEqual, &type_matched);
3418 // Load the super class.
3419 __ MaybeUnpoisonHeapReference(temp);
3420 __ movl(temp, Address(temp, super_class_offset));
3421 // If the super class is null, we reached the root of the hierarchy without a match.
3422 // We let the slow path handle uncovered cases (e.g. interfaces).
3423 __ testl(temp, temp);
3424 __ j(kEqual, slow_path->GetEntryLabel());
3425 __ jmp(&check_type_compatibility);
3426 __ Bind(&type_matched);
3427 }
3428
GenerateVarHandleInstanceFieldObjectCheck(Register varhandle_object,Register object,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3429 static void GenerateVarHandleInstanceFieldObjectCheck(Register varhandle_object,
3430 Register object,
3431 Register temp,
3432 SlowPathCode* slow_path,
3433 X86Assembler* assembler) {
3434 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3435 const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3436
3437 // Check that the VarHandle references an instance field by checking that
3438 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3439 // type compatibility check with the source object's type, which will fail for null.
3440 __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3441 __ j(kNotEqual, slow_path->GetEntryLabel());
3442
3443 // Check if the object is null
3444 __ testl(object, object);
3445 __ j(kZero, slow_path->GetEntryLabel());
3446
3447 // Check the object's class against coordinateType0.
3448 GenerateSubTypeObjectCheck(object,
3449 temp,
3450 Address(varhandle_object, coordtype0_offset),
3451 slow_path,
3452 assembler,
3453 /* object_can_be_null= */ false);
3454 }
3455
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3456 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3457 Register temp,
3458 DataType::Type type,
3459 SlowPathCode* slow_path,
3460 X86Assembler* assembler) {
3461 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3462 const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3463 const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3464
3465 // We do not need a read barrier when loading a reference only for loading a constant field
3466 // through the reference.
3467 __ movl(temp, Address(varhandle_object, var_type_offset));
3468 __ MaybeUnpoisonHeapReference(temp);
3469 __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3470 __ j(kNotEqual, slow_path->GetEntryLabel());
3471 }
3472
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3473 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3474 Register temp,
3475 SlowPathCode* slow_path,
3476 X86Assembler* assembler) {
3477 LocationSummary* locations = invoke->GetLocations();
3478 Register vh_object = locations->InAt(0).AsRegister<Register>();
3479 mirror::VarHandle::AccessMode access_mode =
3480 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3481
3482 GenerateVarHandleAccessModeCheck(vh_object,
3483 access_mode,
3484 slow_path,
3485 assembler);
3486
3487 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3488 switch (expected_coordinates_count) {
3489 case 0u:
3490 GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3491 break;
3492 case 1u: {
3493 Register object = locations->InAt(1).AsRegister<Register>();
3494 GenerateVarHandleInstanceFieldObjectCheck(vh_object, object, temp, slow_path, assembler);
3495 break;
3496 }
3497 default:
3498 // Unimplemented
3499 UNREACHABLE();
3500 }
3501
3502 // Check the return type and varType parameters.
3503 mirror::VarHandle::AccessModeTemplate access_mode_template =
3504 mirror::VarHandle::GetAccessModeTemplate(access_mode);
3505 DataType::Type type = invoke->GetType();
3506
3507 switch (access_mode_template) {
3508 case mirror::VarHandle::AccessModeTemplate::kGet:
3509 // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3510 // are also checked later by a HCheckCast node as an additional check.
3511 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3512 break;
3513 case mirror::VarHandle::AccessModeTemplate::kSet:
3514 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3515 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3516 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3517
3518 // Check the varType.primitiveType against the type of the value we're trying to set.
3519 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3520 if (value_type == DataType::Type::kReference) {
3521 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3522
3523 // If the value type is a reference, check it against the varType.
3524 GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3525 temp,
3526 Address(vh_object, var_type_offset),
3527 slow_path,
3528 assembler);
3529 }
3530 break;
3531 }
3532 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3533 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3534 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3535 uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3536 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3537 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3538
3539 // Check the varType.primitiveType against the type of the expected value.
3540 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3541 if (value_type == DataType::Type::kReference) {
3542 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3543
3544 // If the value type is a reference, check both the expected and the new value against
3545 // the varType.
3546 GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3547 temp,
3548 Address(vh_object, var_type_offset),
3549 slow_path,
3550 assembler);
3551 GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3552 temp,
3553 Address(vh_object, var_type_offset),
3554 slow_path,
3555 assembler);
3556 }
3557 break;
3558 }
3559 }
3560 }
3561
3562 // This method loads the field's address referred by a field VarHandle (base + offset).
3563 // The return value is the register containing object's reference (in case of an instance field)
3564 // or the declaring class (in case of a static field). The declaring class is stored in temp
3565 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3566 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3567 CodeGeneratorX86* codegen,
3568 Register temp,
3569 /*out*/ Register offset) {
3570 X86Assembler* assembler = codegen->GetAssembler();
3571 LocationSummary* locations = invoke->GetLocations();
3572 const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3573 const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3574 const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3575 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3576
3577 // Load the ArtField and the offset
3578 __ movl(temp, Address(varhandle_object, artfield_offset));
3579 __ movl(offset, Address(temp, offset_offset));
3580 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3581 if (expected_coordinates_count == 0) {
3582 // For static fields, load the declaring class
3583 InstructionCodeGeneratorX86* instr_codegen =
3584 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3585 instr_codegen->GenerateGcRootFieldLoad(invoke,
3586 Location::RegisterLocation(temp),
3587 Address(temp, declaring_class_offset),
3588 /* fixup_label= */ nullptr,
3589 kCompilerReadBarrierOption);
3590 return temp;
3591 }
3592
3593 // For instance fields, return the register containing the object.
3594 DCHECK_EQ(expected_coordinates_count, 1u);
3595
3596 return locations->InAt(1).AsRegister<Register>();
3597 }
3598
CreateVarHandleGetLocations(HInvoke * invoke)3599 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3600 // The only read barrier implementation supporting the
3601 // VarHandleGet intrinsic is the Baker-style read barriers.
3602 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3603 return;
3604 }
3605
3606 if (!IsValidFieldVarHandleExpected(invoke)) {
3607 return;
3608 }
3609
3610 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3611 LocationSummary* locations = new (allocator) LocationSummary(
3612 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3613 locations->SetInAt(0, Location::RequiresRegister());
3614 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3615 if (expected_coordinates_count == 1u) {
3616 // For instance fields, this is the source object.
3617 locations->SetInAt(1, Location::RequiresRegister());
3618 }
3619 locations->AddTemp(Location::RequiresRegister());
3620
3621 DataType::Type type = invoke->GetType();
3622 switch (DataType::Kind(type)) {
3623 case DataType::Type::kInt64:
3624 locations->AddTemp(Location::RequiresRegister());
3625 if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3626 // We need an XmmRegister for Int64 to ensure an atomic load
3627 locations->AddTemp(Location::RequiresFpuRegister());
3628 }
3629 FALLTHROUGH_INTENDED;
3630 case DataType::Type::kInt32:
3631 case DataType::Type::kReference:
3632 locations->SetOut(Location::RequiresRegister());
3633 break;
3634 default:
3635 DCHECK(DataType::IsFloatingPointType(type));
3636 locations->AddTemp(Location::RequiresRegister());
3637 locations->SetOut(Location::RequiresFpuRegister());
3638 }
3639 }
3640
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3641 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3642 // The only read barrier implementation supporting the
3643 // VarHandleGet intrinsic is the Baker-style read barriers.
3644 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3645
3646 X86Assembler* assembler = codegen->GetAssembler();
3647 LocationSummary* locations = invoke->GetLocations();
3648 DataType::Type type = invoke->GetType();
3649 DCHECK_NE(type, DataType::Type::kVoid);
3650 Register temp = locations->GetTemp(0).AsRegister<Register>();
3651 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3652 codegen->AddSlowPath(slow_path);
3653
3654 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3655
3656 Location out = locations->Out();
3657 // Use 'out' as a temporary register if it's a core register
3658 Register offset =
3659 out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3660
3661 // Get the field referred by the VarHandle. The returned register contains the object reference
3662 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3663 // declaring class will be placed in 'temp' register.
3664 Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3665 Address field_addr(ref, offset, TIMES_1, 0);
3666
3667 // Load the value from the field
3668 if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) {
3669 codegen->GenerateReferenceLoadWithBakerReadBarrier(
3670 invoke, out, ref, field_addr, /* needs_null_check= */ false);
3671 } else if (type == DataType::Type::kInt64 &&
3672 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3673 XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3674 codegen->LoadFromMemoryNoBarrier(type, out, field_addr, xmm_temp, /* is_atomic_load= */ true);
3675 } else {
3676 codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3677 }
3678
3679 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
3680 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
3681 // Load fence to prevent load-load reordering.
3682 // Note that this is a no-op, thanks to the x86 memory model.
3683 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3684 }
3685
3686 __ Bind(slow_path->GetExitLabel());
3687 }
3688
VisitVarHandleGet(HInvoke * invoke)3689 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
3690 CreateVarHandleGetLocations(invoke);
3691 }
3692
VisitVarHandleGet(HInvoke * invoke)3693 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
3694 GenerateVarHandleGet(invoke, codegen_);
3695 }
3696
VisitVarHandleGetVolatile(HInvoke * invoke)3697 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3698 CreateVarHandleGetLocations(invoke);
3699 }
3700
VisitVarHandleGetVolatile(HInvoke * invoke)3701 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3702 GenerateVarHandleGet(invoke, codegen_);
3703 }
3704
VisitVarHandleGetAcquire(HInvoke * invoke)3705 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3706 CreateVarHandleGetLocations(invoke);
3707 }
3708
VisitVarHandleGetAcquire(HInvoke * invoke)3709 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3710 GenerateVarHandleGet(invoke, codegen_);
3711 }
3712
VisitVarHandleGetOpaque(HInvoke * invoke)3713 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3714 CreateVarHandleGetLocations(invoke);
3715 }
3716
VisitVarHandleGetOpaque(HInvoke * invoke)3717 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3718 GenerateVarHandleGet(invoke, codegen_);
3719 }
3720
CreateVarHandleSetLocations(HInvoke * invoke)3721 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3722 // The only read barrier implementation supporting the
3723 // VarHandleGet intrinsic is the Baker-style read barriers.
3724 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3725 return;
3726 }
3727
3728 if (!IsValidFieldVarHandleExpected(invoke)) {
3729 return;
3730 }
3731
3732 // The last argument should be the value we intend to set.
3733 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3734 HInstruction* value = invoke->InputAt(value_index);
3735 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3736 bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
3737 if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
3738 // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
3739 // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
3740 // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
3741 // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3742 return;
3743 }
3744
3745 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3746 LocationSummary* locations = new (allocator) LocationSummary(
3747 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3748 locations->SetInAt(0, Location::RequiresRegister());
3749 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3750 if (expected_coordinates_count == 1u) {
3751 // For instance fields, this is the source object
3752 locations->SetInAt(1, Location::RequiresRegister());
3753 }
3754
3755 switch (value_type) {
3756 case DataType::Type::kBool:
3757 case DataType::Type::kInt8:
3758 case DataType::Type::kUint8:
3759 // Ensure the value is in a byte register
3760 locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
3761 break;
3762 case DataType::Type::kInt16:
3763 case DataType::Type::kUint16:
3764 case DataType::Type::kInt32:
3765 locations->SetInAt(value_index, Location::RegisterOrConstant(value));
3766 break;
3767 case DataType::Type::kInt64:
3768 // We only handle constant non-atomic int64 values.
3769 DCHECK(value->IsConstant());
3770 locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant()));
3771 break;
3772 case DataType::Type::kReference:
3773 locations->SetInAt(value_index, Location::RequiresRegister());
3774 break;
3775 default:
3776 DCHECK(DataType::IsFloatingPointType(value_type));
3777 if (needs_atomicity && value_type == DataType::Type::kFloat64) {
3778 locations->SetInAt(value_index, Location::RequiresFpuRegister());
3779 } else {
3780 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
3781 }
3782 }
3783
3784 locations->AddTemp(Location::RequiresRegister());
3785 // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
3786 locations->AddTemp(Location::RegisterLocation(EAX));
3787 if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
3788 // For static reference fields, we need another temporary for the declaring class. We set it
3789 // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
3790 locations->AddTemp(Location::RequiresRegister());
3791 }
3792 }
3793
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3794 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3795 // The only read barrier implementation supporting the
3796 // VarHandleGet intrinsic is the Baker-style read barriers.
3797 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3798
3799 X86Assembler* assembler = codegen->GetAssembler();
3800 LocationSummary* locations = invoke->GetLocations();
3801 // The value we want to set is the last argument
3802 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3803 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3804 Register temp = locations->GetTemp(0).AsRegister<Register>();
3805 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
3806 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3807 codegen->AddSlowPath(slow_path);
3808
3809 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3810
3811 // For static reference fields, we need another temporary for the declaring class. But since
3812 // for instance fields the object is in a separate register, it is safe to use the first
3813 // temporary register for GenerateVarHandleFieldReference.
3814 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3815 if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
3816 temp = locations->GetTemp(2).AsRegister<Register>();
3817 }
3818
3819 Register offset = temp2;
3820 // Get the field referred by the VarHandle. The returned register contains the object reference
3821 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3822 // declaring class will be placed in 'temp' register.
3823 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3824
3825 bool is_volatile = false;
3826 switch (invoke->GetIntrinsic()) {
3827 case Intrinsics::kVarHandleSet:
3828 case Intrinsics::kVarHandleSetOpaque:
3829 // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
3830 // values), but we don't treat Int64 values because we would need to place it in a register
3831 // pair. If the slow path is taken, the Parallel move might fail to move the register pair
3832 // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3833 break;
3834 case Intrinsics::kVarHandleSetRelease:
3835 // setRelease needs to ensure atomicity too. See the above comment.
3836 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3837 break;
3838 case Intrinsics::kVarHandleSetVolatile:
3839 is_volatile = true;
3840 break;
3841 default:
3842 LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
3843 }
3844
3845 InstructionCodeGeneratorX86* instr_codegen =
3846 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3847 // Store the value to the field
3848 instr_codegen->HandleFieldSet(invoke,
3849 value_index,
3850 value_type,
3851 Address(reference, offset, TIMES_1, 0),
3852 reference,
3853 is_volatile,
3854 /* value_can_be_null */ true);
3855
3856 __ Bind(slow_path->GetExitLabel());
3857 }
3858
VisitVarHandleSet(HInvoke * invoke)3859 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
3860 CreateVarHandleSetLocations(invoke);
3861 }
3862
VisitVarHandleSet(HInvoke * invoke)3863 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
3864 GenerateVarHandleSet(invoke, codegen_);
3865 }
3866
VisitVarHandleSetVolatile(HInvoke * invoke)3867 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
3868 CreateVarHandleSetLocations(invoke);
3869 }
3870
VisitVarHandleSetVolatile(HInvoke * invoke)3871 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
3872 GenerateVarHandleSet(invoke, codegen_);
3873 }
3874
VisitVarHandleSetRelease(HInvoke * invoke)3875 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
3876 CreateVarHandleSetLocations(invoke);
3877 }
3878
VisitVarHandleSetRelease(HInvoke * invoke)3879 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
3880 GenerateVarHandleSet(invoke, codegen_);
3881 }
3882
VisitVarHandleSetOpaque(HInvoke * invoke)3883 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
3884 CreateVarHandleSetLocations(invoke);
3885 }
3886
VisitVarHandleSetOpaque(HInvoke * invoke)3887 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
3888 GenerateVarHandleSet(invoke, codegen_);
3889 }
3890
CreateVarHandleGetAndSetLocations(HInvoke * invoke)3891 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
3892 // The only read barrier implementation supporting the
3893 // VarHandleGet intrinsic is the Baker-style read barriers.
3894 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
3895 return;
3896 }
3897
3898 if (!IsValidFieldVarHandleExpected(invoke)) {
3899 return;
3900 }
3901
3902 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3903 uint32_t value_index = number_of_arguments - 1;
3904 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3905
3906 if (DataType::Is64BitType(value_type)) {
3907 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
3908 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
3909 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
3910 // <EAX, EBX> to <EBX, ECX>).
3911 return;
3912 }
3913
3914 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3915 LocationSummary* locations = new (allocator) LocationSummary(
3916 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3917 locations->AddTemp(Location::RequiresRegister());
3918 locations->AddTemp(Location::RequiresRegister());
3919 // We use this temporary for the card, so we need a byte register
3920 locations->AddTemp(Location::RegisterLocation(EBX));
3921 locations->SetInAt(0, Location::RequiresRegister());
3922 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
3923 // For instance fields, this is the source object
3924 locations->SetInAt(1, Location::RequiresRegister());
3925 } else {
3926 // For static fields, we need another temp because one will be busy with the declaring class.
3927 locations->AddTemp(Location::RequiresRegister());
3928 }
3929 if (value_type == DataType::Type::kFloat32) {
3930 locations->AddTemp(Location::RegisterLocation(EAX));
3931 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
3932 locations->SetOut(Location::RequiresFpuRegister());
3933 } else {
3934 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
3935 locations->SetOut(Location::RegisterLocation(EAX));
3936 }
3937 }
3938
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3939 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3940 // The only read barrier implementation supporting the
3941 // VarHandleGet intrinsic is the Baker-style read barriers.
3942 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
3943
3944 X86Assembler* assembler = codegen->GetAssembler();
3945 LocationSummary* locations = invoke->GetLocations();
3946 // The value we want to set is the last argument
3947 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3948 Location value = locations->InAt(value_index);
3949 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3950 Register temp = locations->GetTemp(1).AsRegister<Register>();
3951 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
3952 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3953 codegen->AddSlowPath(slow_path);
3954
3955 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3956
3957 Register offset = locations->GetTemp(0).AsRegister<Register>();
3958 // Get the field referred by the VarHandle. The returned register contains the object reference
3959 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3960 // declaring class will be placed in 'temp' register.
3961 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3962 Address field_addr(reference, offset, TIMES_1, 0);
3963
3964 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
3965 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3966 }
3967
3968 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3969 // For static fields, we need another temporary for the declaring class. But since for instance
3970 // fields the object is in a separate register, it is safe to use the first temporary register.
3971 temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
3972 // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
3973 switch (value_type) {
3974 case DataType::Type::kBool:
3975 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
3976 __ movzxb(locations->Out().AsRegister<Register>(),
3977 locations->Out().AsRegister<ByteRegister>());
3978 break;
3979 case DataType::Type::kInt8:
3980 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
3981 __ movsxb(locations->Out().AsRegister<Register>(),
3982 locations->Out().AsRegister<ByteRegister>());
3983 break;
3984 case DataType::Type::kUint16:
3985 __ xchgw(value.AsRegister<Register>(), field_addr);
3986 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
3987 break;
3988 case DataType::Type::kInt16:
3989 __ xchgw(value.AsRegister<Register>(), field_addr);
3990 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
3991 break;
3992 case DataType::Type::kInt32:
3993 __ xchgl(value.AsRegister<Register>(), field_addr);
3994 break;
3995 case DataType::Type::kFloat32:
3996 codegen->Move32(Location::RegisterLocation(EAX), value);
3997 __ xchgl(EAX, field_addr);
3998 __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
3999 break;
4000 case DataType::Type::kReference: {
4001 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4002 // Need to make sure the reference stored in the field is a to-space
4003 // one before attempting the CAS or the CAS could fail incorrectly.
4004 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4005 invoke,
4006 // Unused, used only as a "temporary" within the read barrier.
4007 Location::RegisterLocation(temp),
4008 reference,
4009 field_addr,
4010 /* needs_null_check= */ false,
4011 /* always_update_field= */ true,
4012 &temp2);
4013 }
4014 codegen->MarkGCCard(
4015 temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false);
4016 if (kPoisonHeapReferences) {
4017 __ movl(temp, value.AsRegister<Register>());
4018 __ PoisonHeapReference(temp);
4019 __ xchgl(temp, field_addr);
4020 __ UnpoisonHeapReference(temp);
4021 __ movl(locations->Out().AsRegister<Register>(), temp);
4022 } else {
4023 __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4024 }
4025 break;
4026 }
4027 default:
4028 UNREACHABLE();
4029 }
4030
4031 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4032 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4033 }
4034
4035 __ Bind(slow_path->GetExitLabel());
4036 }
4037
VisitVarHandleGetAndSet(HInvoke * invoke)4038 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4039 CreateVarHandleGetAndSetLocations(invoke);
4040 }
4041
VisitVarHandleGetAndSet(HInvoke * invoke)4042 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4043 GenerateVarHandleGetAndSet(invoke, codegen_);
4044 }
4045
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4046 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4047 CreateVarHandleGetAndSetLocations(invoke);
4048 }
4049
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4050 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4051 GenerateVarHandleGetAndSet(invoke, codegen_);
4052 }
4053
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4054 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4055 CreateVarHandleGetAndSetLocations(invoke);
4056 }
4057
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4058 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4059 GenerateVarHandleGetAndSet(invoke, codegen_);
4060 }
4061
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4062 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4063 // The only read barrier implementation supporting the
4064 // VarHandleGet intrinsic is the Baker-style read barriers.
4065 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4066 return;
4067 }
4068
4069 if (!IsValidFieldVarHandleExpected(invoke)) {
4070 return;
4071 }
4072
4073 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4074 uint32_t expected_value_index = number_of_arguments - 2;
4075 uint32_t new_value_index = number_of_arguments - 1;
4076 DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4077 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4078
4079 if (DataType::Is64BitType(value_type)) {
4080 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4081 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4082 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4083 // <EAX, EBX> to <EBX, ECX>).
4084 return;
4085 }
4086
4087 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4088 LocationSummary* locations = new (allocator) LocationSummary(
4089 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4090 locations->AddTemp(Location::RequiresRegister());
4091 locations->AddTemp(Location::RequiresRegister());
4092 // We use this temporary for the card, so we need a byte register
4093 locations->AddTemp(Location::RegisterLocation(EBX));
4094 locations->SetInAt(0, Location::RequiresRegister());
4095 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4096 // For instance fields, this is the source object
4097 locations->SetInAt(1, Location::RequiresRegister());
4098 } else {
4099 // For static fields, we need another temp because one will be busy with the declaring class.
4100 locations->AddTemp(Location::RequiresRegister());
4101 }
4102 if (DataType::IsFloatingPointType(value_type)) {
4103 // We need EAX for placing the expected value
4104 locations->AddTemp(Location::RegisterLocation(EAX));
4105 locations->SetInAt(new_value_index,
4106 Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4107 locations->SetInAt(expected_value_index,
4108 Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4109 } else {
4110 // Ensure it's in a byte register
4111 locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4112 locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4113 }
4114
4115 mirror::VarHandle::AccessModeTemplate access_mode_template =
4116 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4117
4118 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4119 value_type == DataType::Type::kFloat32) {
4120 locations->SetOut(Location::RequiresFpuRegister());
4121 } else {
4122 locations->SetOut(Location::RegisterLocation(EAX));
4123 }
4124 }
4125
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4126 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4127 // The only read barrier implementation supporting the
4128 // VarHandleGet intrinsic is the Baker-style read barriers.
4129 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4130
4131 X86Assembler* assembler = codegen->GetAssembler();
4132 LocationSummary* locations = invoke->GetLocations();
4133 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4134 uint32_t expected_value_index = number_of_arguments - 2;
4135 uint32_t new_value_index = number_of_arguments - 1;
4136 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4137 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4138 Location expected_value = locations->InAt(expected_value_index);
4139 Location new_value = locations->InAt(new_value_index);
4140 Register offset = locations->GetTemp(0).AsRegister<Register>();
4141 Register temp = locations->GetTemp(1).AsRegister<Register>();
4142 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4143 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4144 codegen->AddSlowPath(slow_path);
4145
4146 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4147
4148 // Get the field referred by the VarHandle. The returned register contains the object reference
4149 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4150 // declaring class will be placed in 'temp' register.
4151 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4152
4153 uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4154 // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4155 // first temporary contains the declaring class so we need another temporary. In case of an
4156 // instance field, the object comes in a separate register so it's safe to use the first temp.
4157 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4158 DCHECK_NE(temp, reference);
4159
4160 // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4161 // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4162 // barriers at this time.
4163
4164 mirror::VarHandle::AccessModeTemplate access_mode_template =
4165 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4166 bool is_cmpxchg =
4167 access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4168
4169 if (type == DataType::Type::kReference) {
4170 GenReferenceCAS(
4171 invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4172 } else {
4173 Location out = locations->Out();
4174 GenPrimitiveCAS(
4175 type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4176 }
4177
4178 __ Bind(slow_path->GetExitLabel());
4179 }
4180
VisitVarHandleCompareAndSet(HInvoke * invoke)4181 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4182 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4183 }
4184
VisitVarHandleCompareAndSet(HInvoke * invoke)4185 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4186 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4187 }
4188
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4189 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4190 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4191 }
4192
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4193 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4194 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4195 }
4196
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4197 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4198 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4199 }
4200
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4201 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4202 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4203 }
4204
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4205 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4206 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4207 }
4208
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4209 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4210 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4211 }
4212
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4213 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4214 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4215 }
4216
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4217 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4218 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4219 }
4220
VisitVarHandleCompareAndExchange(HInvoke * invoke)4221 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4222 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4223 }
4224
VisitVarHandleCompareAndExchange(HInvoke * invoke)4225 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4226 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4227 }
4228
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4229 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4230 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4231 }
4232
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4233 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4234 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4235 }
4236
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4238 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4239 }
4240
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4241 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4242 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4243 }
4244
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4245 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4246 // The only read barrier implementation supporting the
4247 // VarHandleGet intrinsic is the Baker-style read barriers.
4248 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4249 return;
4250 }
4251
4252 if (!IsValidFieldVarHandleExpected(invoke)) {
4253 return;
4254 }
4255
4256 // The last argument should be the value we intend to set.
4257 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4258 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4259 if (DataType::Is64BitType(value_type)) {
4260 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4261 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4262 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4263 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4264 return;
4265 }
4266
4267 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4268 LocationSummary* locations = new (allocator) LocationSummary(
4269 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4270 locations->AddTemp(Location::RequiresRegister());
4271 locations->AddTemp(Location::RequiresRegister());
4272 locations->SetInAt(0, Location::RequiresRegister());
4273 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4274 if (expected_coordinates_count == 1u) {
4275 // For instance fields, this is the source object
4276 locations->SetInAt(1, Location::RequiresRegister());
4277 } else {
4278 // For static fields, we need another temp because one will be busy with the declaring class.
4279 locations->AddTemp(Location::RequiresRegister());
4280 }
4281
4282 if (DataType::IsFloatingPointType(value_type)) {
4283 locations->AddTemp(Location::RequiresFpuRegister());
4284 locations->AddTemp(Location::RegisterLocation(EAX));
4285 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4286 locations->SetOut(Location::RequiresFpuRegister());
4287 } else {
4288 // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4289 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4290 locations->SetOut(Location::RegisterLocation(EAX));
4291 }
4292 }
4293
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4294 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4295 // The only read barrier implementation supporting the
4296 // VarHandleGet intrinsic is the Baker-style read barriers.
4297 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4298
4299 X86Assembler* assembler = codegen->GetAssembler();
4300 LocationSummary* locations = invoke->GetLocations();
4301 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4302 uint32_t value_index = number_of_arguments - 1;
4303 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4304 DCHECK_EQ(type, invoke->GetType());
4305 Location value_loc = locations->InAt(value_index);
4306 Register temp = locations->GetTemp(0).AsRegister<Register>();
4307 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4308 codegen->AddSlowPath(slow_path);
4309
4310 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4311
4312 Register offset = locations->GetTemp(1).AsRegister<Register>();
4313 // Get the field referred by the VarHandle. The returned register contains the object reference
4314 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4315 // declaring class will be placed in 'temp' register.
4316 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4317
4318 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4319 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4320 DCHECK_NE(temp, reference);
4321 Address field_addr(reference, offset, TIMES_1, 0);
4322
4323 switch (type) {
4324 case DataType::Type::kInt8:
4325 __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4326 __ movsxb(locations->Out().AsRegister<Register>(),
4327 locations->Out().AsRegister<ByteRegister>());
4328 break;
4329 case DataType::Type::kInt16:
4330 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4331 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4332 break;
4333 case DataType::Type::kUint16:
4334 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4335 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4336 break;
4337 case DataType::Type::kInt32:
4338 __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4339 break;
4340 case DataType::Type::kFloat32: {
4341 Location temp_float =
4342 (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4343 DCHECK(temp_float.IsFpuRegister());
4344 Location eax = Location::RegisterLocation(EAX);
4345 NearLabel try_again;
4346 __ Bind(&try_again);
4347 __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4348 __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4349 __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4350 value_loc.AsFpuRegister<XmmRegister>());
4351 GenPrimitiveLockedCmpxchg(type,
4352 codegen,
4353 /* expected_value= */ eax,
4354 /* new_value= */ temp_float,
4355 reference,
4356 offset,
4357 temp);
4358 __ j(kNotZero, &try_again);
4359
4360 // The old value is present in EAX.
4361 codegen->Move32(locations->Out(), eax);
4362 break;
4363 }
4364 default:
4365 UNREACHABLE();
4366 }
4367
4368 __ Bind(slow_path->GetExitLabel());
4369 }
4370
VisitVarHandleGetAndAdd(HInvoke * invoke)4371 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4372 CreateVarHandleGetAndAddLocations(invoke);
4373 }
4374
VisitVarHandleGetAndAdd(HInvoke * invoke)4375 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4376 GenerateVarHandleGetAndAdd(invoke, codegen_);
4377 }
4378
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4379 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4380 CreateVarHandleGetAndAddLocations(invoke);
4381 }
4382
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4383 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4384 GenerateVarHandleGetAndAdd(invoke, codegen_);
4385 }
4386
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4387 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4388 CreateVarHandleGetAndAddLocations(invoke);
4389 }
4390
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4391 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4392 GenerateVarHandleGetAndAdd(invoke, codegen_);
4393 }
4394
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4395 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4396 // The only read barrier implementation supporting the
4397 // VarHandleGet intrinsic is the Baker-style read barriers.
4398 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4399 return;
4400 }
4401
4402 if (!IsValidFieldVarHandleExpected(invoke)) {
4403 return;
4404 }
4405
4406 // The last argument should be the value we intend to set.
4407 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4408 if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4409 // We avoid the case of an Int64 value because we would need to place it in a register pair.
4410 // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4411 // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4412 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4413 return;
4414 }
4415
4416 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4417 LocationSummary* locations = new (allocator) LocationSummary(
4418 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4419 // We need a byte register temp to store the result of the bitwise operation
4420 locations->AddTemp(Location::RegisterLocation(EBX));
4421 locations->AddTemp(Location::RequiresRegister());
4422 locations->SetInAt(0, Location::RequiresRegister());
4423 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4424 if (expected_coordinates_count == 1u) {
4425 // For instance fields, this is the source object
4426 locations->SetInAt(1, Location::RequiresRegister());
4427 } else {
4428 // For static fields, we need another temp because one will be busy with the declaring class.
4429 locations->AddTemp(Location::RequiresRegister());
4430 }
4431
4432 locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4433 locations->SetOut(Location::RegisterLocation(EAX));
4434 }
4435
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4436 static void GenerateBitwiseOp(HInvoke* invoke,
4437 CodeGeneratorX86* codegen,
4438 Register left,
4439 Register right) {
4440 X86Assembler* assembler = codegen->GetAssembler();
4441
4442 switch (invoke->GetIntrinsic()) {
4443 case Intrinsics::kVarHandleGetAndBitwiseOr:
4444 case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4445 case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4446 __ orl(left, right);
4447 break;
4448 case Intrinsics::kVarHandleGetAndBitwiseXor:
4449 case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4450 case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4451 __ xorl(left, right);
4452 break;
4453 case Intrinsics::kVarHandleGetAndBitwiseAnd:
4454 case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4455 case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4456 __ andl(left, right);
4457 break;
4458 default:
4459 UNREACHABLE();
4460 }
4461 }
4462
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4463 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4464 // The only read barrier implementation supporting the
4465 // VarHandleGet intrinsic is the Baker-style read barriers.
4466 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
4467
4468 X86Assembler* assembler = codegen->GetAssembler();
4469 LocationSummary* locations = invoke->GetLocations();
4470 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4471 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4472 DCHECK_EQ(type, invoke->GetType());
4473 Register temp = locations->GetTemp(0).AsRegister<Register>();
4474 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4475 codegen->AddSlowPath(slow_path);
4476
4477 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4478
4479 Register offset = locations->GetTemp(1).AsRegister<Register>();
4480 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4481 // For static field, we need another temporary because the first one contains the declaring class
4482 Register reference =
4483 (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4484 // Get the field referred by the VarHandle. The returned register contains the object reference
4485 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4486 // declaring class will be placed in 'reference' register.
4487 reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4488 DCHECK_NE(temp, reference);
4489 Address field_addr(reference, offset, TIMES_1, 0);
4490
4491 Register out = locations->Out().AsRegister<Register>();
4492 DCHECK_EQ(out, EAX);
4493
4494 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4495 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4496 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4497 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4498 }
4499
4500 NearLabel try_again;
4501 __ Bind(&try_again);
4502 // Place the expected value in EAX for cmpxchg
4503 codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4504 codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4505 GenerateBitwiseOp(invoke, codegen, temp, out);
4506 GenPrimitiveLockedCmpxchg(type,
4507 codegen,
4508 /* expected_value= */ locations->Out(),
4509 /* new_value= */ locations->GetTemp(0),
4510 reference,
4511 offset);
4512 // If the cmpxchg failed, another thread changed the value so try again.
4513 __ j(kNotZero, &try_again);
4514
4515 // The old value is present in EAX.
4516
4517 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4518 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4519 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4520 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4521 }
4522
4523 __ Bind(slow_path->GetExitLabel());
4524 }
4525
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4526 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4527 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4528 }
4529
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4530 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4531 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4532 }
4533
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4534 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4535 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4536 }
4537
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4538 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4539 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4540 }
4541
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4542 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4543 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4544 }
4545
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4546 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4547 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4548 }
4549
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4550 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4551 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4552 }
4553
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4554 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4555 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4556 }
4557
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4558 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4559 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4560 }
4561
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4562 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4563 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4564 }
4565
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4566 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4567 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4568 }
4569
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4570 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4571 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4572 }
4573
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4574 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4575 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4576 }
4577
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4578 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4579 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4580 }
4581
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4582 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4583 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4584 }
4585
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4586 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4587 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4588 }
4589
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4590 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4591 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4592 }
4593
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4594 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4595 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4596 }
4597
4598 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
4599 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
4600 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
4601 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
4602 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
4603 UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned)
4604 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
4605 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
4606 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
4607 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
4608 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
4609 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
4610 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
4611 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
4612 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
4613 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
4614 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
4615 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
4616 UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh)
4617
4618 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
4619 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
4620 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
4621 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
4622 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
4623 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
4624 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
4625 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
4626 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
4627 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
4628 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
4629 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
4630 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
4631 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
4632 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
4633 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
4634 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
4635
4636 // 1.8.
4637 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
4638 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
4639 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
4640 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
4641 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
4642
4643 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
4644 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
4645
4646 UNREACHABLE_INTRINSICS(X86)
4647
4648 #undef __
4649
4650 } // namespace x86
4651 } // namespace art
4652