1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86/assembler_x86.h"
37 #include "utils/x86/constants_x86.h"
38
39 namespace art {
40
41 namespace x86 {
42
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
44 : allocator_(codegen->GetGraph()->GetAllocator()),
45 codegen_(codegen) {
46 }
47
48
GetAssembler()49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
50 return down_cast<X86Assembler*>(codegen_->GetAssembler());
51 }
52
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54 return codegen_->GetGraph()->GetAllocator();
55 }
56
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
60 if (res == nullptr) {
61 return false;
62 }
63 return res->Intrinsified();
64 }
65
MoveArguments(HInvoke * invoke,CodeGeneratorX86 * codegen)66 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
67 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
68 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
69 }
70
71 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
72
73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
75
76 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
77 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
78 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)79 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
80 : SlowPathCode(instruction) {
81 DCHECK(kEmitCompilerReadBarrier);
82 DCHECK(kUseBakerReadBarrier);
83 }
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 LocationSummary* locations = instruction_->GetLocations();
88 DCHECK(locations->CanCall());
89 DCHECK(instruction_->IsInvokeStaticOrDirect())
90 << "Unexpected instruction in read barrier arraycopy slow path: "
91 << instruction_->DebugName();
92 DCHECK(instruction_->GetLocations()->Intrinsified());
93 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
94
95 int32_t element_size = DataType::Size(DataType::Type::kReference);
96 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
97
98 Register src = locations->InAt(0).AsRegister<Register>();
99 Location src_pos = locations->InAt(1);
100 Register dest = locations->InAt(2).AsRegister<Register>();
101 Location dest_pos = locations->InAt(3);
102 Location length = locations->InAt(4);
103 Location temp1_loc = locations->GetTemp(0);
104 Register temp1 = temp1_loc.AsRegister<Register>();
105 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
106 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
107
108 __ Bind(GetEntryLabel());
109 // In this code path, registers `temp1`, `temp2`, and `temp3`
110 // (resp.) are not used for the base source address, the base
111 // destination address, and the end source address (resp.), as in
112 // other SystemArrayCopy intrinsic code paths. Instead they are
113 // (resp.) used for:
114 // - the loop index (`i`);
115 // - the source index (`src_index`) and the loaded (source)
116 // reference (`value`); and
117 // - the destination index (`dest_index`).
118
119 // i = 0
120 __ xorl(temp1, temp1);
121 NearLabel loop;
122 __ Bind(&loop);
123 // value = src_array[i + src_pos]
124 if (src_pos.IsConstant()) {
125 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
126 int32_t adjusted_offset = offset + constant * element_size;
127 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
128 } else {
129 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
130 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
131 }
132 __ MaybeUnpoisonHeapReference(temp2);
133 // TODO: Inline the mark bit check before calling the runtime?
134 // value = ReadBarrier::Mark(value)
135 // No need to save live registers; it's taken care of by the
136 // entrypoint. Also, there is no need to update the stack mask,
137 // as this runtime call will not trigger a garbage collection.
138 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
139 // explanations.)
140 DCHECK_NE(temp2, ESP);
141 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
142 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
143 // This runtime call does not require a stack map.
144 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145 __ MaybePoisonHeapReference(temp2);
146 // dest_array[i + dest_pos] = value
147 if (dest_pos.IsConstant()) {
148 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
149 int32_t adjusted_offset = offset + constant * element_size;
150 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
151 } else {
152 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
153 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
154 }
155 // ++i
156 __ addl(temp1, Immediate(1));
157 // if (i != length) goto loop
158 x86_codegen->GenerateIntCompare(temp1_loc, length);
159 __ j(kNotEqual, &loop);
160 __ jmp(GetExitLabel());
161 }
162
GetDescription() const163 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
164
165 private:
166 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
167 };
168
169 #undef __
170
171 #define __ assembler->
172
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)173 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
174 LocationSummary* locations =
175 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
176 locations->SetInAt(0, Location::RequiresFpuRegister());
177 locations->SetOut(Location::RequiresRegister());
178 if (is64bit) {
179 locations->AddTemp(Location::RequiresFpuRegister());
180 }
181 }
182
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)183 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
184 LocationSummary* locations =
185 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186 locations->SetInAt(0, Location::RequiresRegister());
187 locations->SetOut(Location::RequiresFpuRegister());
188 if (is64bit) {
189 locations->AddTemp(Location::RequiresFpuRegister());
190 locations->AddTemp(Location::RequiresFpuRegister());
191 }
192 }
193
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)194 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
195 Location input = locations->InAt(0);
196 Location output = locations->Out();
197 if (is64bit) {
198 // Need to use the temporary.
199 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
200 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
201 __ movd(output.AsRegisterPairLow<Register>(), temp);
202 __ psrlq(temp, Immediate(32));
203 __ movd(output.AsRegisterPairHigh<Register>(), temp);
204 } else {
205 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
206 }
207 }
208
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)209 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
210 Location input = locations->InAt(0);
211 Location output = locations->Out();
212 if (is64bit) {
213 // Need to use the temporary.
214 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
215 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
216 __ movd(temp1, input.AsRegisterPairLow<Register>());
217 __ movd(temp2, input.AsRegisterPairHigh<Register>());
218 __ punpckldq(temp1, temp2);
219 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
220 } else {
221 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
222 }
223 }
224
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
226 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
227 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)228 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
229 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
230 }
231
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
233 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)235 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
236 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
237 }
238
VisitFloatFloatToRawIntBits(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
240 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
241 }
VisitFloatIntBitsToFloat(HInvoke * invoke)242 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
243 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
244 }
245
VisitFloatFloatToRawIntBits(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
247 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
VisitFloatIntBitsToFloat(HInvoke * invoke)249 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
250 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
251 }
252
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)253 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
254 LocationSummary* locations =
255 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
256 locations->SetInAt(0, Location::RequiresRegister());
257 locations->SetOut(Location::SameAsFirstInput());
258 }
259
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)260 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
261 LocationSummary* locations =
262 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
263 locations->SetInAt(0, Location::RequiresRegister());
264 locations->SetOut(Location::RequiresRegister());
265 }
266
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)267 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
268 LocationSummary* locations =
269 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
270 locations->SetInAt(0, Location::RequiresRegister());
271 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
272 }
273
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)274 static void GenReverseBytes(LocationSummary* locations,
275 DataType::Type size,
276 X86Assembler* assembler) {
277 Register out = locations->Out().AsRegister<Register>();
278
279 switch (size) {
280 case DataType::Type::kInt16:
281 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
282 __ bswapl(out);
283 __ sarl(out, Immediate(16));
284 break;
285 case DataType::Type::kInt32:
286 __ bswapl(out);
287 break;
288 default:
289 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
290 UNREACHABLE();
291 }
292 }
293
VisitIntegerReverseBytes(HInvoke * invoke)294 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
295 CreateIntToIntLocations(allocator_, invoke);
296 }
297
VisitIntegerReverseBytes(HInvoke * invoke)298 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
299 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
300 }
301
VisitLongReverseBytes(HInvoke * invoke)302 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
303 CreateLongToLongLocations(allocator_, invoke);
304 }
305
VisitLongReverseBytes(HInvoke * invoke)306 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
307 LocationSummary* locations = invoke->GetLocations();
308 Location input = locations->InAt(0);
309 Register input_lo = input.AsRegisterPairLow<Register>();
310 Register input_hi = input.AsRegisterPairHigh<Register>();
311 Location output = locations->Out();
312 Register output_lo = output.AsRegisterPairLow<Register>();
313 Register output_hi = output.AsRegisterPairHigh<Register>();
314
315 X86Assembler* assembler = GetAssembler();
316 // Assign the inputs to the outputs, mixing low/high.
317 __ movl(output_lo, input_hi);
318 __ movl(output_hi, input_lo);
319 __ bswapl(output_lo);
320 __ bswapl(output_hi);
321 }
322
VisitShortReverseBytes(HInvoke * invoke)323 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
324 CreateIntToIntLocations(allocator_, invoke);
325 }
326
VisitShortReverseBytes(HInvoke * invoke)327 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
328 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
329 }
330
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)331 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
332 LocationSummary* locations =
333 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
334 locations->SetInAt(0, Location::RequiresFpuRegister());
335 locations->SetOut(Location::RequiresFpuRegister());
336 }
337
VisitMathSqrt(HInvoke * invoke)338 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
339 CreateFPToFPLocations(allocator_, invoke);
340 }
341
VisitMathSqrt(HInvoke * invoke)342 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
343 LocationSummary* locations = invoke->GetLocations();
344 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
345 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
346
347 GetAssembler()->sqrtsd(out, in);
348 }
349
InvokeOutOfLineIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke)350 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
351 MoveArguments(invoke, codegen);
352
353 DCHECK(invoke->IsInvokeStaticOrDirect());
354 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
355 Location::RegisterLocation(EAX));
356
357 // Copy the result back to the expected output.
358 Location out = invoke->GetLocations()->Out();
359 if (out.IsValid()) {
360 DCHECK(out.IsRegister());
361 codegen->MoveFromReturnRegister(out, invoke->GetType());
362 }
363 }
364
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)365 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
366 HInvoke* invoke,
367 CodeGeneratorX86* codegen) {
368 // Do we have instruction support?
369 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
370 CreateFPToFPLocations(allocator, invoke);
371 return;
372 }
373
374 // We have to fall back to a call to the intrinsic.
375 LocationSummary* locations =
376 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
377 InvokeRuntimeCallingConvention calling_convention;
378 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
379 locations->SetOut(Location::FpuRegisterLocation(XMM0));
380 // Needs to be EAX for the invoke.
381 locations->AddTemp(Location::RegisterLocation(EAX));
382 }
383
GenSSE41FPToFPIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke,X86Assembler * assembler,int round_mode)384 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
385 HInvoke* invoke,
386 X86Assembler* assembler,
387 int round_mode) {
388 LocationSummary* locations = invoke->GetLocations();
389 if (locations->WillCall()) {
390 InvokeOutOfLineIntrinsic(codegen, invoke);
391 } else {
392 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
393 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
394 __ roundsd(out, in, Immediate(round_mode));
395 }
396 }
397
VisitMathCeil(HInvoke * invoke)398 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
399 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
400 }
401
VisitMathCeil(HInvoke * invoke)402 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
403 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
404 }
405
VisitMathFloor(HInvoke * invoke)406 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
407 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
408 }
409
VisitMathFloor(HInvoke * invoke)410 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
411 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
412 }
413
VisitMathRint(HInvoke * invoke)414 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
415 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
416 }
417
VisitMathRint(HInvoke * invoke)418 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
419 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
420 }
421
VisitMathRoundFloat(HInvoke * invoke)422 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
423 // Do we have instruction support?
424 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
425 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
426 DCHECK(static_or_direct != nullptr);
427 LocationSummary* locations =
428 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
429 locations->SetInAt(0, Location::RequiresFpuRegister());
430 if (static_or_direct->HasSpecialInput() &&
431 invoke->InputAt(
432 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
433 locations->SetInAt(1, Location::RequiresRegister());
434 }
435 locations->SetOut(Location::RequiresRegister());
436 locations->AddTemp(Location::RequiresFpuRegister());
437 locations->AddTemp(Location::RequiresFpuRegister());
438 return;
439 }
440
441 // We have to fall back to a call to the intrinsic.
442 LocationSummary* locations =
443 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
444 InvokeRuntimeCallingConvention calling_convention;
445 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
446 locations->SetOut(Location::RegisterLocation(EAX));
447 // Needs to be EAX for the invoke.
448 locations->AddTemp(Location::RegisterLocation(EAX));
449 }
450
VisitMathRoundFloat(HInvoke * invoke)451 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
452 LocationSummary* locations = invoke->GetLocations();
453 if (locations->WillCall()) { // TODO: can we reach this?
454 InvokeOutOfLineIntrinsic(codegen_, invoke);
455 return;
456 }
457
458 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
459 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
460 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
461 Register out = locations->Out().AsRegister<Register>();
462 NearLabel skip_incr, done;
463 X86Assembler* assembler = GetAssembler();
464
465 // Since no direct x86 rounding instruction matches the required semantics,
466 // this intrinsic is implemented as follows:
467 // result = floor(in);
468 // if (in - result >= 0.5f)
469 // result = result + 1.0f;
470 __ movss(t2, in);
471 __ roundss(t1, in, Immediate(1));
472 __ subss(t2, t1);
473 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
474 // Direct constant area available.
475 HX86ComputeBaseMethodAddress* method_address =
476 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
477 Register constant_area = locations->InAt(1).AsRegister<Register>();
478 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
479 method_address,
480 constant_area));
481 __ j(kBelow, &skip_incr);
482 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
483 method_address,
484 constant_area));
485 __ Bind(&skip_incr);
486 } else {
487 // No constant area: go through stack.
488 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
489 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
490 __ comiss(t2, Address(ESP, 4));
491 __ j(kBelow, &skip_incr);
492 __ addss(t1, Address(ESP, 0));
493 __ Bind(&skip_incr);
494 __ addl(ESP, Immediate(8));
495 }
496
497 // Final conversion to an integer. Unfortunately this also does not have a
498 // direct x86 instruction, since NaN should map to 0 and large positive
499 // values need to be clipped to the extreme value.
500 __ movl(out, Immediate(kPrimIntMax));
501 __ cvtsi2ss(t2, out);
502 __ comiss(t1, t2);
503 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
504 __ movl(out, Immediate(0)); // does not change flags
505 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
506 __ cvttss2si(out, t1);
507 __ Bind(&done);
508 }
509
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)510 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
511 LocationSummary* locations =
512 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
513 InvokeRuntimeCallingConvention calling_convention;
514 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
515 locations->SetOut(Location::FpuRegisterLocation(XMM0));
516 }
517
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)518 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
519 LocationSummary* locations = invoke->GetLocations();
520 DCHECK(locations->WillCall());
521 DCHECK(invoke->IsInvokeStaticOrDirect());
522 X86Assembler* assembler = codegen->GetAssembler();
523
524 // We need some place to pass the parameters.
525 __ subl(ESP, Immediate(16));
526 __ cfi().AdjustCFAOffset(16);
527
528 // Pass the parameters at the bottom of the stack.
529 __ movsd(Address(ESP, 0), XMM0);
530
531 // If we have a second parameter, pass it next.
532 if (invoke->GetNumberOfArguments() == 2) {
533 __ movsd(Address(ESP, 8), XMM1);
534 }
535
536 // Now do the actual call.
537 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
538
539 // Extract the return value from the FP stack.
540 __ fstpl(Address(ESP, 0));
541 __ movsd(XMM0, Address(ESP, 0));
542
543 // And clean up the stack.
544 __ addl(ESP, Immediate(16));
545 __ cfi().AdjustCFAOffset(-16);
546 }
547
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)548 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
549 LocationSummary* locations =
550 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
551 if (is_long) {
552 locations->SetInAt(0, Location::RequiresRegister());
553 } else {
554 locations->SetInAt(0, Location::Any());
555 }
556 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
557 }
558
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)559 static void GenLowestOneBit(X86Assembler* assembler,
560 CodeGeneratorX86* codegen,
561 bool is_long,
562 HInvoke* invoke) {
563 LocationSummary* locations = invoke->GetLocations();
564 Location src = locations->InAt(0);
565 Location out_loc = locations->Out();
566
567 if (invoke->InputAt(0)->IsConstant()) {
568 // Evaluate this at compile time.
569 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
570 if (value == 0) {
571 if (is_long) {
572 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
573 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
574 } else {
575 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
576 }
577 return;
578 }
579 // Nonzero value.
580 value = is_long ? CTZ(static_cast<uint64_t>(value))
581 : CTZ(static_cast<uint32_t>(value));
582 if (is_long) {
583 if (value >= 32) {
584 int shift = value-32;
585 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
586 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
587 } else {
588 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
589 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
590 }
591 } else {
592 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
593 }
594 return;
595 }
596 // Handle non constant case
597 if (is_long) {
598 DCHECK(src.IsRegisterPair());
599 Register src_lo = src.AsRegisterPairLow<Register>();
600 Register src_hi = src.AsRegisterPairHigh<Register>();
601
602 Register out_lo = out_loc.AsRegisterPairLow<Register>();
603 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
604
605 __ movl(out_lo, src_lo);
606 __ movl(out_hi, src_hi);
607
608 __ negl(out_lo);
609 __ adcl(out_hi, Immediate(0));
610 __ negl(out_hi);
611
612 __ andl(out_lo, src_lo);
613 __ andl(out_hi, src_hi);
614 } else {
615 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
616 Register out = out_loc.AsRegister<Register>();
617 __ blsi(out, src.AsRegister<Register>());
618 } else {
619 Register out = out_loc.AsRegister<Register>();
620 // Do tmp & -tmp
621 if (src.IsRegister()) {
622 __ movl(out, src.AsRegister<Register>());
623 } else {
624 DCHECK(src.IsStackSlot());
625 __ movl(out, Address(ESP, src.GetStackIndex()));
626 }
627 __ negl(out);
628
629 if (src.IsRegister()) {
630 __ andl(out, src.AsRegister<Register>());
631 } else {
632 __ andl(out, Address(ESP, src.GetStackIndex()));
633 }
634 }
635 }
636 }
637
VisitMathCos(HInvoke * invoke)638 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
639 CreateFPToFPCallLocations(allocator_, invoke);
640 }
641
VisitMathCos(HInvoke * invoke)642 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
643 GenFPToFPCall(invoke, codegen_, kQuickCos);
644 }
645
VisitMathSin(HInvoke * invoke)646 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
647 CreateFPToFPCallLocations(allocator_, invoke);
648 }
649
VisitMathSin(HInvoke * invoke)650 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
651 GenFPToFPCall(invoke, codegen_, kQuickSin);
652 }
653
VisitMathAcos(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
655 CreateFPToFPCallLocations(allocator_, invoke);
656 }
657
VisitMathAcos(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
659 GenFPToFPCall(invoke, codegen_, kQuickAcos);
660 }
661
VisitMathAsin(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
663 CreateFPToFPCallLocations(allocator_, invoke);
664 }
665
VisitMathAsin(HInvoke * invoke)666 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
667 GenFPToFPCall(invoke, codegen_, kQuickAsin);
668 }
669
VisitMathAtan(HInvoke * invoke)670 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
671 CreateFPToFPCallLocations(allocator_, invoke);
672 }
673
VisitMathAtan(HInvoke * invoke)674 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
675 GenFPToFPCall(invoke, codegen_, kQuickAtan);
676 }
677
VisitMathCbrt(HInvoke * invoke)678 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
679 CreateFPToFPCallLocations(allocator_, invoke);
680 }
681
VisitMathCbrt(HInvoke * invoke)682 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
683 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
684 }
685
VisitMathCosh(HInvoke * invoke)686 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
687 CreateFPToFPCallLocations(allocator_, invoke);
688 }
689
VisitMathCosh(HInvoke * invoke)690 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
691 GenFPToFPCall(invoke, codegen_, kQuickCosh);
692 }
693
VisitMathExp(HInvoke * invoke)694 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
695 CreateFPToFPCallLocations(allocator_, invoke);
696 }
697
VisitMathExp(HInvoke * invoke)698 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
699 GenFPToFPCall(invoke, codegen_, kQuickExp);
700 }
701
VisitMathExpm1(HInvoke * invoke)702 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
703 CreateFPToFPCallLocations(allocator_, invoke);
704 }
705
VisitMathExpm1(HInvoke * invoke)706 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
707 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
708 }
709
VisitMathLog(HInvoke * invoke)710 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
711 CreateFPToFPCallLocations(allocator_, invoke);
712 }
713
VisitMathLog(HInvoke * invoke)714 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
715 GenFPToFPCall(invoke, codegen_, kQuickLog);
716 }
717
VisitMathLog10(HInvoke * invoke)718 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
719 CreateFPToFPCallLocations(allocator_, invoke);
720 }
721
VisitMathLog10(HInvoke * invoke)722 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
723 GenFPToFPCall(invoke, codegen_, kQuickLog10);
724 }
725
VisitMathSinh(HInvoke * invoke)726 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
727 CreateFPToFPCallLocations(allocator_, invoke);
728 }
729
VisitMathSinh(HInvoke * invoke)730 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
731 GenFPToFPCall(invoke, codegen_, kQuickSinh);
732 }
733
VisitMathTan(HInvoke * invoke)734 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
735 CreateFPToFPCallLocations(allocator_, invoke);
736 }
737
VisitMathTan(HInvoke * invoke)738 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
739 GenFPToFPCall(invoke, codegen_, kQuickTan);
740 }
741
VisitMathTanh(HInvoke * invoke)742 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
743 CreateFPToFPCallLocations(allocator_, invoke);
744 }
745
VisitMathTanh(HInvoke * invoke)746 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
747 GenFPToFPCall(invoke, codegen_, kQuickTanh);
748 }
749
VisitIntegerLowestOneBit(HInvoke * invoke)750 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
751 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
752 }
VisitIntegerLowestOneBit(HInvoke * invoke)753 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
754 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
755 }
756
VisitLongLowestOneBit(HInvoke * invoke)757 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
758 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
759 }
760
VisitLongLowestOneBit(HInvoke * invoke)761 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
762 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
763 }
764
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)765 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
766 LocationSummary* locations =
767 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
768 InvokeRuntimeCallingConvention calling_convention;
769 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
770 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
771 locations->SetOut(Location::FpuRegisterLocation(XMM0));
772 }
773
VisitMathAtan2(HInvoke * invoke)774 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
775 CreateFPFPToFPCallLocations(allocator_, invoke);
776 }
777
VisitMathAtan2(HInvoke * invoke)778 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
779 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
780 }
781
VisitMathPow(HInvoke * invoke)782 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
783 CreateFPFPToFPCallLocations(allocator_, invoke);
784 }
785
VisitMathPow(HInvoke * invoke)786 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
787 GenFPToFPCall(invoke, codegen_, kQuickPow);
788 }
789
VisitMathHypot(HInvoke * invoke)790 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
791 CreateFPFPToFPCallLocations(allocator_, invoke);
792 }
793
VisitMathHypot(HInvoke * invoke)794 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
795 GenFPToFPCall(invoke, codegen_, kQuickHypot);
796 }
797
VisitMathNextAfter(HInvoke * invoke)798 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
799 CreateFPFPToFPCallLocations(allocator_, invoke);
800 }
801
VisitMathNextAfter(HInvoke * invoke)802 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
803 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
804 }
805
VisitSystemArrayCopyChar(HInvoke * invoke)806 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
807 // We need at least two of the positions or length to be an integer constant,
808 // or else we won't have enough free registers.
809 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
810 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
811 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
812
813 int num_constants =
814 ((src_pos != nullptr) ? 1 : 0)
815 + ((dest_pos != nullptr) ? 1 : 0)
816 + ((length != nullptr) ? 1 : 0);
817
818 if (num_constants < 2) {
819 // Not enough free registers.
820 return;
821 }
822
823 // As long as we are checking, we might as well check to see if the src and dest
824 // positions are >= 0.
825 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
826 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
827 // We will have to fail anyways.
828 return;
829 }
830
831 // And since we are already checking, check the length too.
832 if (length != nullptr) {
833 int32_t len = length->GetValue();
834 if (len < 0) {
835 // Just call as normal.
836 return;
837 }
838 }
839
840 // Okay, it is safe to generate inline code.
841 LocationSummary* locations =
842 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
843 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
844 locations->SetInAt(0, Location::RequiresRegister());
845 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
846 locations->SetInAt(2, Location::RequiresRegister());
847 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
848 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
849
850 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
851 locations->AddTemp(Location::RegisterLocation(ESI));
852 locations->AddTemp(Location::RegisterLocation(EDI));
853 locations->AddTemp(Location::RegisterLocation(ECX));
854 }
855
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)856 static void CheckPosition(X86Assembler* assembler,
857 Location pos,
858 Register input,
859 Location length,
860 SlowPathCode* slow_path,
861 Register temp,
862 bool length_is_input_length = false) {
863 // Where is the length in the Array?
864 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
865
866 if (pos.IsConstant()) {
867 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
868 if (pos_const == 0) {
869 if (!length_is_input_length) {
870 // Check that length(input) >= length.
871 if (length.IsConstant()) {
872 __ cmpl(Address(input, length_offset),
873 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
874 } else {
875 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
876 }
877 __ j(kLess, slow_path->GetEntryLabel());
878 }
879 } else {
880 // Check that length(input) >= pos.
881 __ movl(temp, Address(input, length_offset));
882 __ subl(temp, Immediate(pos_const));
883 __ j(kLess, slow_path->GetEntryLabel());
884
885 // Check that (length(input) - pos) >= length.
886 if (length.IsConstant()) {
887 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
888 } else {
889 __ cmpl(temp, length.AsRegister<Register>());
890 }
891 __ j(kLess, slow_path->GetEntryLabel());
892 }
893 } else if (length_is_input_length) {
894 // The only way the copy can succeed is if pos is zero.
895 Register pos_reg = pos.AsRegister<Register>();
896 __ testl(pos_reg, pos_reg);
897 __ j(kNotEqual, slow_path->GetEntryLabel());
898 } else {
899 // Check that pos >= 0.
900 Register pos_reg = pos.AsRegister<Register>();
901 __ testl(pos_reg, pos_reg);
902 __ j(kLess, slow_path->GetEntryLabel());
903
904 // Check that pos <= length(input).
905 __ cmpl(Address(input, length_offset), pos_reg);
906 __ j(kLess, slow_path->GetEntryLabel());
907
908 // Check that (length(input) - pos) >= length.
909 __ movl(temp, Address(input, length_offset));
910 __ subl(temp, pos_reg);
911 if (length.IsConstant()) {
912 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
913 } else {
914 __ cmpl(temp, length.AsRegister<Register>());
915 }
916 __ j(kLess, slow_path->GetEntryLabel());
917 }
918 }
919
VisitSystemArrayCopyChar(HInvoke * invoke)920 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
921 X86Assembler* assembler = GetAssembler();
922 LocationSummary* locations = invoke->GetLocations();
923
924 Register src = locations->InAt(0).AsRegister<Register>();
925 Location srcPos = locations->InAt(1);
926 Register dest = locations->InAt(2).AsRegister<Register>();
927 Location destPos = locations->InAt(3);
928 Location length = locations->InAt(4);
929
930 // Temporaries that we need for MOVSW.
931 Register src_base = locations->GetTemp(0).AsRegister<Register>();
932 DCHECK_EQ(src_base, ESI);
933 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
934 DCHECK_EQ(dest_base, EDI);
935 Register count = locations->GetTemp(2).AsRegister<Register>();
936 DCHECK_EQ(count, ECX);
937
938 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
939 codegen_->AddSlowPath(slow_path);
940
941 // Bail out if the source and destination are the same (to handle overlap).
942 __ cmpl(src, dest);
943 __ j(kEqual, slow_path->GetEntryLabel());
944
945 // Bail out if the source is null.
946 __ testl(src, src);
947 __ j(kEqual, slow_path->GetEntryLabel());
948
949 // Bail out if the destination is null.
950 __ testl(dest, dest);
951 __ j(kEqual, slow_path->GetEntryLabel());
952
953 // If the length is negative, bail out.
954 // We have already checked in the LocationsBuilder for the constant case.
955 if (!length.IsConstant()) {
956 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
957 __ j(kLess, slow_path->GetEntryLabel());
958 }
959
960 // We need the count in ECX.
961 if (length.IsConstant()) {
962 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
963 } else {
964 __ movl(count, length.AsRegister<Register>());
965 }
966
967 // Validity checks: source. Use src_base as a temporary register.
968 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
969
970 // Validity checks: dest. Use src_base as a temporary register.
971 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
972
973 // Okay, everything checks out. Finally time to do the copy.
974 // Check assumption that sizeof(Char) is 2 (used in scaling below).
975 const size_t char_size = DataType::Size(DataType::Type::kUint16);
976 DCHECK_EQ(char_size, 2u);
977
978 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
979
980 if (srcPos.IsConstant()) {
981 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
982 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
983 } else {
984 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
985 ScaleFactor::TIMES_2, data_offset));
986 }
987 if (destPos.IsConstant()) {
988 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
989
990 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
991 } else {
992 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
993 ScaleFactor::TIMES_2, data_offset));
994 }
995
996 // Do the move.
997 __ rep_movsw();
998
999 __ Bind(slow_path->GetExitLabel());
1000 }
1001
VisitStringCompareTo(HInvoke * invoke)1002 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1003 // The inputs plus one temp.
1004 LocationSummary* locations = new (allocator_) LocationSummary(
1005 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1006 InvokeRuntimeCallingConvention calling_convention;
1007 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1008 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1009 locations->SetOut(Location::RegisterLocation(EAX));
1010 }
1011
VisitStringCompareTo(HInvoke * invoke)1012 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1013 X86Assembler* assembler = GetAssembler();
1014 LocationSummary* locations = invoke->GetLocations();
1015
1016 // Note that the null check must have been done earlier.
1017 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1018
1019 Register argument = locations->InAt(1).AsRegister<Register>();
1020 __ testl(argument, argument);
1021 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1022 codegen_->AddSlowPath(slow_path);
1023 __ j(kEqual, slow_path->GetEntryLabel());
1024
1025 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1026 __ Bind(slow_path->GetExitLabel());
1027 }
1028
VisitStringEquals(HInvoke * invoke)1029 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1030 LocationSummary* locations =
1031 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1032 locations->SetInAt(0, Location::RequiresRegister());
1033 locations->SetInAt(1, Location::RequiresRegister());
1034
1035 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1036 locations->AddTemp(Location::RegisterLocation(ECX));
1037 locations->AddTemp(Location::RegisterLocation(EDI));
1038
1039 // Set output, ESI needed for repe_cmpsl instruction anyways.
1040 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1041 }
1042
VisitStringEquals(HInvoke * invoke)1043 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1044 X86Assembler* assembler = GetAssembler();
1045 LocationSummary* locations = invoke->GetLocations();
1046
1047 Register str = locations->InAt(0).AsRegister<Register>();
1048 Register arg = locations->InAt(1).AsRegister<Register>();
1049 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1050 Register edi = locations->GetTemp(1).AsRegister<Register>();
1051 Register esi = locations->Out().AsRegister<Register>();
1052
1053 NearLabel end, return_true, return_false;
1054
1055 // Get offsets of count, value, and class fields within a string object.
1056 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1057 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1058 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1059
1060 // Note that the null check must have been done earlier.
1061 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1062
1063 StringEqualsOptimizations optimizations(invoke);
1064 if (!optimizations.GetArgumentNotNull()) {
1065 // Check if input is null, return false if it is.
1066 __ testl(arg, arg);
1067 __ j(kEqual, &return_false);
1068 }
1069
1070 if (!optimizations.GetArgumentIsString()) {
1071 // Instanceof check for the argument by comparing class fields.
1072 // All string objects must have the same type since String cannot be subclassed.
1073 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1074 // If the argument is a string object, its class field must be equal to receiver's class field.
1075 //
1076 // As the String class is expected to be non-movable, we can read the class
1077 // field from String.equals' arguments without read barriers.
1078 AssertNonMovableStringClass();
1079 // Also, because we use the loaded class references only to compare them, we
1080 // don't need to unpoison them.
1081 // /* HeapReference<Class> */ ecx = str->klass_
1082 __ movl(ecx, Address(str, class_offset));
1083 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1084 __ cmpl(ecx, Address(arg, class_offset));
1085 __ j(kNotEqual, &return_false);
1086 }
1087
1088 // Reference equality check, return true if same reference.
1089 __ cmpl(str, arg);
1090 __ j(kEqual, &return_true);
1091
1092 // Load length and compression flag of receiver string.
1093 __ movl(ecx, Address(str, count_offset));
1094 // Check if lengths and compression flags are equal, return false if they're not.
1095 // Two identical strings will always have same compression style since
1096 // compression style is decided on alloc.
1097 __ cmpl(ecx, Address(arg, count_offset));
1098 __ j(kNotEqual, &return_false);
1099 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1100 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1101 "Expecting 0=compressed, 1=uncompressed");
1102 __ jecxz(&return_true);
1103
1104 if (mirror::kUseStringCompression) {
1105 NearLabel string_uncompressed;
1106 // Extract length and differentiate between both compressed or both uncompressed.
1107 // Different compression style is cut above.
1108 __ shrl(ecx, Immediate(1));
1109 __ j(kCarrySet, &string_uncompressed);
1110 // Divide string length by 2, rounding up, and continue as if uncompressed.
1111 __ addl(ecx, Immediate(1));
1112 __ shrl(ecx, Immediate(1));
1113 __ Bind(&string_uncompressed);
1114 }
1115 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1116 __ leal(esi, Address(str, value_offset));
1117 __ leal(edi, Address(arg, value_offset));
1118
1119 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1120 // divisible by 2.
1121 __ addl(ecx, Immediate(1));
1122 __ shrl(ecx, Immediate(1));
1123
1124 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1125 // or 4 characters (compressed) at a time.
1126 DCHECK_ALIGNED(value_offset, 4);
1127 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1128
1129 // Loop to compare strings two characters at a time starting at the beginning of the string.
1130 __ repe_cmpsl();
1131 // If strings are not equal, zero flag will be cleared.
1132 __ j(kNotEqual, &return_false);
1133
1134 // Return true and exit the function.
1135 // If loop does not result in returning false, we return true.
1136 __ Bind(&return_true);
1137 __ movl(esi, Immediate(1));
1138 __ jmp(&end);
1139
1140 // Return false and exit the function.
1141 __ Bind(&return_false);
1142 __ xorl(esi, esi);
1143 __ Bind(&end);
1144 }
1145
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1146 static void CreateStringIndexOfLocations(HInvoke* invoke,
1147 ArenaAllocator* allocator,
1148 bool start_at_zero) {
1149 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1150 LocationSummary::kCallOnSlowPath,
1151 kIntrinsified);
1152 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1153 locations->SetInAt(0, Location::RegisterLocation(EDI));
1154 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1155 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1156 // of the instruction explicitly.
1157 // Note: This works as we don't clobber EAX anywhere.
1158 locations->SetInAt(1, Location::RegisterLocation(EAX));
1159 if (!start_at_zero) {
1160 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1161 }
1162 // As we clobber EDI during execution anyways, also use it as the output.
1163 locations->SetOut(Location::SameAsFirstInput());
1164
1165 // repne scasw uses ECX as the counter.
1166 locations->AddTemp(Location::RegisterLocation(ECX));
1167 // Need another temporary to be able to compute the result.
1168 locations->AddTemp(Location::RequiresRegister());
1169 if (mirror::kUseStringCompression) {
1170 // Need another temporary to be able to save unflagged string length.
1171 locations->AddTemp(Location::RequiresRegister());
1172 }
1173 }
1174
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1175 static void GenerateStringIndexOf(HInvoke* invoke,
1176 X86Assembler* assembler,
1177 CodeGeneratorX86* codegen,
1178 bool start_at_zero) {
1179 LocationSummary* locations = invoke->GetLocations();
1180
1181 // Note that the null check must have been done earlier.
1182 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1183
1184 Register string_obj = locations->InAt(0).AsRegister<Register>();
1185 Register search_value = locations->InAt(1).AsRegister<Register>();
1186 Register counter = locations->GetTemp(0).AsRegister<Register>();
1187 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1188 Register out = locations->Out().AsRegister<Register>();
1189 // Only used when string compression feature is on.
1190 Register string_length_flagged;
1191
1192 // Check our assumptions for registers.
1193 DCHECK_EQ(string_obj, EDI);
1194 DCHECK_EQ(search_value, EAX);
1195 DCHECK_EQ(counter, ECX);
1196 DCHECK_EQ(out, EDI);
1197
1198 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1199 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1200 SlowPathCode* slow_path = nullptr;
1201 HInstruction* code_point = invoke->InputAt(1);
1202 if (code_point->IsIntConstant()) {
1203 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1204 std::numeric_limits<uint16_t>::max()) {
1205 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1206 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1207 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1208 codegen->AddSlowPath(slow_path);
1209 __ jmp(slow_path->GetEntryLabel());
1210 __ Bind(slow_path->GetExitLabel());
1211 return;
1212 }
1213 } else if (code_point->GetType() != DataType::Type::kUint16) {
1214 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1215 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1216 codegen->AddSlowPath(slow_path);
1217 __ j(kAbove, slow_path->GetEntryLabel());
1218 }
1219
1220 // From here down, we know that we are looking for a char that fits in 16 bits.
1221 // Location of reference to data array within the String object.
1222 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1223 // Location of count within the String object.
1224 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1225
1226 // Load the count field of the string containing the length and compression flag.
1227 __ movl(string_length, Address(string_obj, count_offset));
1228
1229 // Do a zero-length check. Even with string compression `count == 0` means empty.
1230 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1231 "Expecting 0=compressed, 1=uncompressed");
1232 // TODO: Support jecxz.
1233 NearLabel not_found_label;
1234 __ testl(string_length, string_length);
1235 __ j(kEqual, ¬_found_label);
1236
1237 if (mirror::kUseStringCompression) {
1238 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1239 __ movl(string_length_flagged, string_length);
1240 // Extract the length and shift out the least significant bit used as compression flag.
1241 __ shrl(string_length, Immediate(1));
1242 }
1243
1244 if (start_at_zero) {
1245 // Number of chars to scan is the same as the string length.
1246 __ movl(counter, string_length);
1247
1248 // Move to the start of the string.
1249 __ addl(string_obj, Immediate(value_offset));
1250 } else {
1251 Register start_index = locations->InAt(2).AsRegister<Register>();
1252
1253 // Do a start_index check.
1254 __ cmpl(start_index, string_length);
1255 __ j(kGreaterEqual, ¬_found_label);
1256
1257 // Ensure we have a start index >= 0;
1258 __ xorl(counter, counter);
1259 __ cmpl(start_index, Immediate(0));
1260 __ cmovl(kGreater, counter, start_index);
1261
1262 if (mirror::kUseStringCompression) {
1263 NearLabel modify_counter, offset_uncompressed_label;
1264 __ testl(string_length_flagged, Immediate(1));
1265 __ j(kNotZero, &offset_uncompressed_label);
1266 // Move to the start of the string: string_obj + value_offset + start_index.
1267 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1268 __ jmp(&modify_counter);
1269
1270 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1271 __ Bind(&offset_uncompressed_label);
1272 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1273
1274 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1275 // compare.
1276 __ Bind(&modify_counter);
1277 } else {
1278 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1279 }
1280 __ negl(counter);
1281 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1282 }
1283
1284 if (mirror::kUseStringCompression) {
1285 NearLabel uncompressed_string_comparison;
1286 NearLabel comparison_done;
1287 __ testl(string_length_flagged, Immediate(1));
1288 __ j(kNotZero, &uncompressed_string_comparison);
1289
1290 // Check if EAX (search_value) is ASCII.
1291 __ cmpl(search_value, Immediate(127));
1292 __ j(kGreater, ¬_found_label);
1293 // Comparing byte-per-byte.
1294 __ repne_scasb();
1295 __ jmp(&comparison_done);
1296
1297 // Everything is set up for repne scasw:
1298 // * Comparison address in EDI.
1299 // * Counter in ECX.
1300 __ Bind(&uncompressed_string_comparison);
1301 __ repne_scasw();
1302 __ Bind(&comparison_done);
1303 } else {
1304 __ repne_scasw();
1305 }
1306 // Did we find a match?
1307 __ j(kNotEqual, ¬_found_label);
1308
1309 // Yes, we matched. Compute the index of the result.
1310 __ subl(string_length, counter);
1311 __ leal(out, Address(string_length, -1));
1312
1313 NearLabel done;
1314 __ jmp(&done);
1315
1316 // Failed to match; return -1.
1317 __ Bind(¬_found_label);
1318 __ movl(out, Immediate(-1));
1319
1320 // And join up at the end.
1321 __ Bind(&done);
1322 if (slow_path != nullptr) {
1323 __ Bind(slow_path->GetExitLabel());
1324 }
1325 }
1326
VisitStringIndexOf(HInvoke * invoke)1327 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1328 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1329 }
1330
VisitStringIndexOf(HInvoke * invoke)1331 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1332 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1333 }
1334
VisitStringIndexOfAfter(HInvoke * invoke)1335 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1336 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1337 }
1338
VisitStringIndexOfAfter(HInvoke * invoke)1339 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1340 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1341 }
1342
VisitStringNewStringFromBytes(HInvoke * invoke)1343 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1344 LocationSummary* locations = new (allocator_) LocationSummary(
1345 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1346 InvokeRuntimeCallingConvention calling_convention;
1347 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1348 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1349 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1350 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1351 locations->SetOut(Location::RegisterLocation(EAX));
1352 }
1353
VisitStringNewStringFromBytes(HInvoke * invoke)1354 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1355 X86Assembler* assembler = GetAssembler();
1356 LocationSummary* locations = invoke->GetLocations();
1357
1358 Register byte_array = locations->InAt(0).AsRegister<Register>();
1359 __ testl(byte_array, byte_array);
1360 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1361 codegen_->AddSlowPath(slow_path);
1362 __ j(kEqual, slow_path->GetEntryLabel());
1363
1364 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1365 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1366 __ Bind(slow_path->GetExitLabel());
1367 }
1368
VisitStringNewStringFromChars(HInvoke * invoke)1369 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1370 LocationSummary* locations =
1371 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1372 InvokeRuntimeCallingConvention calling_convention;
1373 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1374 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1375 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1376 locations->SetOut(Location::RegisterLocation(EAX));
1377 }
1378
VisitStringNewStringFromChars(HInvoke * invoke)1379 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1380 // No need to emit code checking whether `locations->InAt(2)` is a null
1381 // pointer, as callers of the native method
1382 //
1383 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1384 //
1385 // all include a null check on `data` before calling that method.
1386 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1387 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1388 }
1389
VisitStringNewStringFromString(HInvoke * invoke)1390 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1391 LocationSummary* locations = new (allocator_) LocationSummary(
1392 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1393 InvokeRuntimeCallingConvention calling_convention;
1394 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1395 locations->SetOut(Location::RegisterLocation(EAX));
1396 }
1397
VisitStringNewStringFromString(HInvoke * invoke)1398 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1399 X86Assembler* assembler = GetAssembler();
1400 LocationSummary* locations = invoke->GetLocations();
1401
1402 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1403 __ testl(string_to_copy, string_to_copy);
1404 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1405 codegen_->AddSlowPath(slow_path);
1406 __ j(kEqual, slow_path->GetEntryLabel());
1407
1408 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1409 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1410 __ Bind(slow_path->GetExitLabel());
1411 }
1412
VisitStringGetCharsNoCheck(HInvoke * invoke)1413 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1414 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1415 LocationSummary* locations =
1416 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1417 locations->SetInAt(0, Location::RequiresRegister());
1418 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1419 // Place srcEnd in ECX to save a move below.
1420 locations->SetInAt(2, Location::RegisterLocation(ECX));
1421 locations->SetInAt(3, Location::RequiresRegister());
1422 locations->SetInAt(4, Location::RequiresRegister());
1423
1424 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1425 // We don't have enough registers to also grab ECX, so handle below.
1426 locations->AddTemp(Location::RegisterLocation(ESI));
1427 locations->AddTemp(Location::RegisterLocation(EDI));
1428 }
1429
VisitStringGetCharsNoCheck(HInvoke * invoke)1430 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1431 X86Assembler* assembler = GetAssembler();
1432 LocationSummary* locations = invoke->GetLocations();
1433
1434 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1435 // Location of data in char array buffer.
1436 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1437 // Location of char array data in string.
1438 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1439
1440 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1441 Register obj = locations->InAt(0).AsRegister<Register>();
1442 Location srcBegin = locations->InAt(1);
1443 int srcBegin_value =
1444 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1445 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1446 Register dst = locations->InAt(3).AsRegister<Register>();
1447 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1448
1449 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1450 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1451 DCHECK_EQ(char_size, 2u);
1452
1453 // Compute the number of chars (words) to move.
1454 // Save ECX, since we don't know if it will be used later.
1455 __ pushl(ECX);
1456 int stack_adjust = kX86WordSize;
1457 __ cfi().AdjustCFAOffset(stack_adjust);
1458 DCHECK_EQ(srcEnd, ECX);
1459 if (srcBegin.IsConstant()) {
1460 __ subl(ECX, Immediate(srcBegin_value));
1461 } else {
1462 DCHECK(srcBegin.IsRegister());
1463 __ subl(ECX, srcBegin.AsRegister<Register>());
1464 }
1465
1466 NearLabel done;
1467 if (mirror::kUseStringCompression) {
1468 // Location of count in string
1469 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1470 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1471 DCHECK_EQ(c_char_size, 1u);
1472 __ pushl(EAX);
1473 __ cfi().AdjustCFAOffset(stack_adjust);
1474
1475 NearLabel copy_loop, copy_uncompressed;
1476 __ testl(Address(obj, count_offset), Immediate(1));
1477 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1478 "Expecting 0=compressed, 1=uncompressed");
1479 __ j(kNotZero, ©_uncompressed);
1480 // Compute the address of the source string by adding the number of chars from
1481 // the source beginning to the value offset of a string.
1482 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1483
1484 // Start the loop to copy String's value to Array of Char.
1485 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1486 __ Bind(©_loop);
1487 __ jecxz(&done);
1488 // Use EAX temporary (convert byte from ESI to word).
1489 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1490 __ movzxb(EAX, Address(ESI, 0));
1491 __ movw(Address(EDI, 0), EAX);
1492 __ leal(EDI, Address(EDI, char_size));
1493 __ leal(ESI, Address(ESI, c_char_size));
1494 // TODO: Add support for LOOP to X86Assembler.
1495 __ subl(ECX, Immediate(1));
1496 __ jmp(©_loop);
1497 __ Bind(©_uncompressed);
1498 }
1499
1500 // Do the copy for uncompressed string.
1501 // Compute the address of the destination buffer.
1502 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1503 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1504 __ rep_movsw();
1505
1506 __ Bind(&done);
1507 if (mirror::kUseStringCompression) {
1508 // Restore EAX.
1509 __ popl(EAX);
1510 __ cfi().AdjustCFAOffset(-stack_adjust);
1511 }
1512 // Restore ECX.
1513 __ popl(ECX);
1514 __ cfi().AdjustCFAOffset(-stack_adjust);
1515 }
1516
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1517 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1518 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1519 Location out_loc = locations->Out();
1520 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1521 // to avoid a SIGBUS.
1522 switch (size) {
1523 case DataType::Type::kInt8:
1524 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1525 break;
1526 case DataType::Type::kInt16:
1527 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1528 break;
1529 case DataType::Type::kInt32:
1530 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1531 break;
1532 case DataType::Type::kInt64:
1533 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1534 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1535 break;
1536 default:
1537 LOG(FATAL) << "Type not recognized for peek: " << size;
1538 UNREACHABLE();
1539 }
1540 }
1541
VisitMemoryPeekByte(HInvoke * invoke)1542 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1543 CreateLongToIntLocations(allocator_, invoke);
1544 }
1545
VisitMemoryPeekByte(HInvoke * invoke)1546 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1547 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1548 }
1549
VisitMemoryPeekIntNative(HInvoke * invoke)1550 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1551 CreateLongToIntLocations(allocator_, invoke);
1552 }
1553
VisitMemoryPeekIntNative(HInvoke * invoke)1554 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1555 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1556 }
1557
VisitMemoryPeekLongNative(HInvoke * invoke)1558 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1559 CreateLongToLongLocations(allocator_, invoke);
1560 }
1561
VisitMemoryPeekLongNative(HInvoke * invoke)1562 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1563 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1564 }
1565
VisitMemoryPeekShortNative(HInvoke * invoke)1566 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1567 CreateLongToIntLocations(allocator_, invoke);
1568 }
1569
VisitMemoryPeekShortNative(HInvoke * invoke)1570 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1571 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1572 }
1573
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1574 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1575 DataType::Type size,
1576 HInvoke* invoke) {
1577 LocationSummary* locations =
1578 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1579 locations->SetInAt(0, Location::RequiresRegister());
1580 HInstruction* value = invoke->InputAt(1);
1581 if (size == DataType::Type::kInt8) {
1582 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1583 } else {
1584 locations->SetInAt(1, Location::RegisterOrConstant(value));
1585 }
1586 }
1587
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1588 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1589 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1590 Location value_loc = locations->InAt(1);
1591 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1592 // to avoid a SIGBUS.
1593 switch (size) {
1594 case DataType::Type::kInt8:
1595 if (value_loc.IsConstant()) {
1596 __ movb(Address(address, 0),
1597 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1598 } else {
1599 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1600 }
1601 break;
1602 case DataType::Type::kInt16:
1603 if (value_loc.IsConstant()) {
1604 __ movw(Address(address, 0),
1605 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1606 } else {
1607 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1608 }
1609 break;
1610 case DataType::Type::kInt32:
1611 if (value_loc.IsConstant()) {
1612 __ movl(Address(address, 0),
1613 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1614 } else {
1615 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1616 }
1617 break;
1618 case DataType::Type::kInt64:
1619 if (value_loc.IsConstant()) {
1620 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1621 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1622 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1623 } else {
1624 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1625 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1626 }
1627 break;
1628 default:
1629 LOG(FATAL) << "Type not recognized for poke: " << size;
1630 UNREACHABLE();
1631 }
1632 }
1633
VisitMemoryPokeByte(HInvoke * invoke)1634 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1635 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1636 }
1637
VisitMemoryPokeByte(HInvoke * invoke)1638 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1639 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1640 }
1641
VisitMemoryPokeIntNative(HInvoke * invoke)1642 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1643 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1644 }
1645
VisitMemoryPokeIntNative(HInvoke * invoke)1646 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1647 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1648 }
1649
VisitMemoryPokeLongNative(HInvoke * invoke)1650 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1651 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1652 }
1653
VisitMemoryPokeLongNative(HInvoke * invoke)1654 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1655 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1656 }
1657
VisitMemoryPokeShortNative(HInvoke * invoke)1658 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1659 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1660 }
1661
VisitMemoryPokeShortNative(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1663 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1664 }
1665
VisitThreadCurrentThread(HInvoke * invoke)1666 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1667 LocationSummary* locations =
1668 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1669 locations->SetOut(Location::RequiresRegister());
1670 }
1671
VisitThreadCurrentThread(HInvoke * invoke)1672 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1673 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1674 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1675 }
1676
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1677 static void GenUnsafeGet(HInvoke* invoke,
1678 DataType::Type type,
1679 bool is_volatile,
1680 CodeGeneratorX86* codegen) {
1681 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1682 LocationSummary* locations = invoke->GetLocations();
1683 Location base_loc = locations->InAt(1);
1684 Register base = base_loc.AsRegister<Register>();
1685 Location offset_loc = locations->InAt(2);
1686 Register offset = offset_loc.AsRegisterPairLow<Register>();
1687 Location output_loc = locations->Out();
1688
1689 switch (type) {
1690 case DataType::Type::kInt32: {
1691 Register output = output_loc.AsRegister<Register>();
1692 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1693 break;
1694 }
1695
1696 case DataType::Type::kReference: {
1697 Register output = output_loc.AsRegister<Register>();
1698 if (kEmitCompilerReadBarrier) {
1699 if (kUseBakerReadBarrier) {
1700 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1701 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1702 invoke, output_loc, base, src, /* needs_null_check= */ false);
1703 } else {
1704 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1705 codegen->GenerateReadBarrierSlow(
1706 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1707 }
1708 } else {
1709 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1710 __ MaybeUnpoisonHeapReference(output);
1711 }
1712 break;
1713 }
1714
1715 case DataType::Type::kInt64: {
1716 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1717 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1718 if (is_volatile) {
1719 // Need to use a XMM to read atomically.
1720 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1721 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1722 __ movd(output_lo, temp);
1723 __ psrlq(temp, Immediate(32));
1724 __ movd(output_hi, temp);
1725 } else {
1726 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1727 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1728 }
1729 }
1730 break;
1731
1732 default:
1733 LOG(FATAL) << "Unsupported op size " << type;
1734 UNREACHABLE();
1735 }
1736 }
1737
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1738 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1739 HInvoke* invoke,
1740 DataType::Type type,
1741 bool is_volatile) {
1742 bool can_call = kEmitCompilerReadBarrier &&
1743 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1744 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1745 LocationSummary* locations =
1746 new (allocator) LocationSummary(invoke,
1747 can_call
1748 ? LocationSummary::kCallOnSlowPath
1749 : LocationSummary::kNoCall,
1750 kIntrinsified);
1751 if (can_call && kUseBakerReadBarrier) {
1752 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1753 }
1754 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1755 locations->SetInAt(1, Location::RequiresRegister());
1756 locations->SetInAt(2, Location::RequiresRegister());
1757 if (type == DataType::Type::kInt64) {
1758 if (is_volatile) {
1759 // Need to use XMM to read volatile.
1760 locations->AddTemp(Location::RequiresFpuRegister());
1761 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1762 } else {
1763 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1764 }
1765 } else {
1766 locations->SetOut(Location::RequiresRegister(),
1767 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1768 }
1769 }
1770
VisitUnsafeGet(HInvoke * invoke)1771 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1772 CreateIntIntIntToIntLocations(
1773 allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1774 }
VisitUnsafeGetVolatile(HInvoke * invoke)1775 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1776 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1777 }
VisitUnsafeGetLong(HInvoke * invoke)1778 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1779 CreateIntIntIntToIntLocations(
1780 allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1781 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1782 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1783 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1784 }
VisitUnsafeGetObject(HInvoke * invoke)1785 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1786 CreateIntIntIntToIntLocations(
1787 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1788 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1789 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1790 CreateIntIntIntToIntLocations(
1791 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1792 }
1793
1794
VisitUnsafeGet(HInvoke * invoke)1795 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1796 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1797 }
VisitUnsafeGetVolatile(HInvoke * invoke)1798 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1799 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1800 }
VisitUnsafeGetLong(HInvoke * invoke)1801 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1802 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1803 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1804 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1805 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1806 }
VisitUnsafeGetObject(HInvoke * invoke)1807 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1808 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1809 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1810 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1811 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1812 }
1813
1814
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1815 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1816 DataType::Type type,
1817 HInvoke* invoke,
1818 bool is_volatile) {
1819 LocationSummary* locations =
1820 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1821 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1822 locations->SetInAt(1, Location::RequiresRegister());
1823 locations->SetInAt(2, Location::RequiresRegister());
1824 locations->SetInAt(3, Location::RequiresRegister());
1825 if (type == DataType::Type::kReference) {
1826 // Need temp registers for card-marking.
1827 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1828 // Ensure the value is in a byte register.
1829 locations->AddTemp(Location::RegisterLocation(ECX));
1830 } else if (type == DataType::Type::kInt64 && is_volatile) {
1831 locations->AddTemp(Location::RequiresFpuRegister());
1832 locations->AddTemp(Location::RequiresFpuRegister());
1833 }
1834 }
1835
VisitUnsafePut(HInvoke * invoke)1836 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1837 CreateIntIntIntIntToVoidPlusTempsLocations(
1838 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1839 }
VisitUnsafePutOrdered(HInvoke * invoke)1840 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1841 CreateIntIntIntIntToVoidPlusTempsLocations(
1842 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1843 }
VisitUnsafePutVolatile(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1845 CreateIntIntIntIntToVoidPlusTempsLocations(
1846 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1847 }
VisitUnsafePutObject(HInvoke * invoke)1848 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1849 CreateIntIntIntIntToVoidPlusTempsLocations(
1850 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1851 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1853 CreateIntIntIntIntToVoidPlusTempsLocations(
1854 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1855 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1856 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1857 CreateIntIntIntIntToVoidPlusTempsLocations(
1858 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1859 }
VisitUnsafePutLong(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1861 CreateIntIntIntIntToVoidPlusTempsLocations(
1862 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1863 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1864 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1865 CreateIntIntIntIntToVoidPlusTempsLocations(
1866 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1867 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1868 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1869 CreateIntIntIntIntToVoidPlusTempsLocations(
1870 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1871 }
1872
1873 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1874 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1875 static void GenUnsafePut(LocationSummary* locations,
1876 DataType::Type type,
1877 bool is_volatile,
1878 CodeGeneratorX86* codegen) {
1879 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1880 Register base = locations->InAt(1).AsRegister<Register>();
1881 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1882 Location value_loc = locations->InAt(3);
1883
1884 if (type == DataType::Type::kInt64) {
1885 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1886 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1887 if (is_volatile) {
1888 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1889 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1890 __ movd(temp1, value_lo);
1891 __ movd(temp2, value_hi);
1892 __ punpckldq(temp1, temp2);
1893 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1894 } else {
1895 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1896 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1897 }
1898 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1899 Register temp = locations->GetTemp(0).AsRegister<Register>();
1900 __ movl(temp, value_loc.AsRegister<Register>());
1901 __ PoisonHeapReference(temp);
1902 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1903 } else {
1904 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1905 }
1906
1907 if (is_volatile) {
1908 codegen->MemoryFence();
1909 }
1910
1911 if (type == DataType::Type::kReference) {
1912 bool value_can_be_null = true; // TODO: Worth finding out this information?
1913 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1914 locations->GetTemp(1).AsRegister<Register>(),
1915 base,
1916 value_loc.AsRegister<Register>(),
1917 value_can_be_null);
1918 }
1919 }
1920
VisitUnsafePut(HInvoke * invoke)1921 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1922 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1923 }
VisitUnsafePutOrdered(HInvoke * invoke)1924 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1925 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1926 }
VisitUnsafePutVolatile(HInvoke * invoke)1927 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1928 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1929 }
VisitUnsafePutObject(HInvoke * invoke)1930 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1931 GenUnsafePut(
1932 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1933 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1934 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1935 GenUnsafePut(
1936 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1937 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1938 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1939 GenUnsafePut(
1940 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1941 }
VisitUnsafePutLong(HInvoke * invoke)1942 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1943 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1944 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1945 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1946 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1947 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1948 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1949 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1950 }
1951
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1952 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1953 DataType::Type type,
1954 HInvoke* invoke) {
1955 bool can_call = kEmitCompilerReadBarrier &&
1956 kUseBakerReadBarrier &&
1957 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1958 LocationSummary* locations =
1959 new (allocator) LocationSummary(invoke,
1960 can_call
1961 ? LocationSummary::kCallOnSlowPath
1962 : LocationSummary::kNoCall,
1963 kIntrinsified);
1964 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1965 locations->SetInAt(1, Location::RequiresRegister());
1966 // Offset is a long, but in 32 bit mode, we only need the low word.
1967 // Can we update the invoke here to remove a TypeConvert to Long?
1968 locations->SetInAt(2, Location::RequiresRegister());
1969 // Expected value must be in EAX or EDX:EAX.
1970 // For long, new value must be in ECX:EBX.
1971 if (type == DataType::Type::kInt64) {
1972 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1973 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1974 } else {
1975 locations->SetInAt(3, Location::RegisterLocation(EAX));
1976 locations->SetInAt(4, Location::RequiresRegister());
1977 }
1978
1979 // Force a byte register for the output.
1980 locations->SetOut(Location::RegisterLocation(EAX));
1981 if (type == DataType::Type::kReference) {
1982 // Need temporary registers for card-marking, and possibly for
1983 // (Baker) read barrier.
1984 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1985 // Need a byte register for marking.
1986 locations->AddTemp(Location::RegisterLocation(ECX));
1987 }
1988 }
1989
VisitUnsafeCASInt(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1991 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1992 }
1993
VisitUnsafeCASLong(HInvoke * invoke)1994 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1995 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1996 }
1997
VisitUnsafeCASObject(HInvoke * invoke)1998 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1999 // The only read barrier implementation supporting the
2000 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2001 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2002 return;
2003 }
2004
2005 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2006 }
2007
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2008 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2009 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2010 LocationSummary* locations = invoke->GetLocations();
2011
2012 Register base = locations->InAt(1).AsRegister<Register>();
2013 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2014 Location out = locations->Out();
2015 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2016
2017 // The address of the field within the holding object.
2018 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2019
2020 if (type == DataType::Type::kReference) {
2021 // The only read barrier implementation supporting the
2022 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2023 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2024
2025 Location temp1_loc = locations->GetTemp(0);
2026 Register temp1 = temp1_loc.AsRegister<Register>();
2027 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2028
2029 Register expected = locations->InAt(3).AsRegister<Register>();
2030 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
2031 DCHECK_EQ(expected, EAX);
2032 Register value = locations->InAt(4).AsRegister<Register>();
2033
2034 // Mark card for object assuming new value is stored.
2035 bool value_can_be_null = true; // TODO: Worth finding out this information?
2036 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2037
2038 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2039 // Need to make sure the reference stored in the field is a to-space
2040 // one before attempting the CAS or the CAS could fail incorrectly.
2041 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2042 invoke,
2043 temp1_loc, // Unused, used only as a "temporary" within the read barrier.
2044 base,
2045 field_addr,
2046 /* needs_null_check= */ false,
2047 /* always_update_field= */ true,
2048 &temp2);
2049 }
2050
2051 bool base_equals_value = (base == value);
2052 if (kPoisonHeapReferences) {
2053 if (base_equals_value) {
2054 // If `base` and `value` are the same register location, move
2055 // `value` to a temporary register. This way, poisoning
2056 // `value` won't invalidate `base`.
2057 value = temp1;
2058 __ movl(value, base);
2059 }
2060
2061 // Check that the register allocator did not assign the location
2062 // of `expected` (EAX) to `value` nor to `base`, so that heap
2063 // poisoning (when enabled) works as intended below.
2064 // - If `value` were equal to `expected`, both references would
2065 // be poisoned twice, meaning they would not be poisoned at
2066 // all, as heap poisoning uses address negation.
2067 // - If `base` were equal to `expected`, poisoning `expected`
2068 // would invalidate `base`.
2069 DCHECK_NE(value, expected);
2070 DCHECK_NE(base, expected);
2071
2072 __ PoisonHeapReference(expected);
2073 __ PoisonHeapReference(value);
2074 }
2075
2076 __ LockCmpxchgl(field_addr, value);
2077
2078 // LOCK CMPXCHG has full barrier semantics, and we don't need
2079 // scheduling barriers at this time.
2080
2081 // Convert ZF into the Boolean result.
2082 __ setb(kZero, out.AsRegister<Register>());
2083 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2084
2085 // If heap poisoning is enabled, we need to unpoison the values
2086 // that were poisoned earlier.
2087 if (kPoisonHeapReferences) {
2088 if (base_equals_value) {
2089 // `value` has been moved to a temporary register, no need to
2090 // unpoison it.
2091 } else {
2092 // Ensure `value` is different from `out`, so that unpoisoning
2093 // the former does not invalidate the latter.
2094 DCHECK_NE(value, out.AsRegister<Register>());
2095 __ UnpoisonHeapReference(value);
2096 }
2097 // Do not unpoison the reference contained in register
2098 // `expected`, as it is the same as register `out` (EAX).
2099 }
2100 } else {
2101 if (type == DataType::Type::kInt32) {
2102 // Ensure the expected value is in EAX (required by the CMPXCHG
2103 // instruction).
2104 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2105 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2106 } else if (type == DataType::Type::kInt64) {
2107 // Ensure the expected value is in EAX:EDX and that the new
2108 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2109 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2110 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2111 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2112 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2113 __ LockCmpxchg8b(field_addr);
2114 } else {
2115 LOG(FATAL) << "Unexpected CAS type " << type;
2116 }
2117
2118 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2119 // don't need scheduling barriers at this time.
2120
2121 // Convert ZF into the Boolean result.
2122 __ setb(kZero, out.AsRegister<Register>());
2123 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2124 }
2125 }
2126
VisitUnsafeCASInt(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2128 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2129 }
2130
VisitUnsafeCASLong(HInvoke * invoke)2131 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2132 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2133 }
2134
VisitUnsafeCASObject(HInvoke * invoke)2135 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2136 // The only read barrier implementation supporting the
2137 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2138 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2139
2140 GenCAS(DataType::Type::kReference, invoke, codegen_);
2141 }
2142
VisitIntegerReverse(HInvoke * invoke)2143 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2144 LocationSummary* locations =
2145 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2146 locations->SetInAt(0, Location::RequiresRegister());
2147 locations->SetOut(Location::SameAsFirstInput());
2148 locations->AddTemp(Location::RequiresRegister());
2149 }
2150
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2151 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2152 X86Assembler* assembler) {
2153 Immediate imm_shift(shift);
2154 Immediate imm_mask(mask);
2155 __ movl(temp, reg);
2156 __ shrl(reg, imm_shift);
2157 __ andl(temp, imm_mask);
2158 __ andl(reg, imm_mask);
2159 __ shll(temp, imm_shift);
2160 __ orl(reg, temp);
2161 }
2162
VisitIntegerReverse(HInvoke * invoke)2163 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2164 X86Assembler* assembler = GetAssembler();
2165 LocationSummary* locations = invoke->GetLocations();
2166
2167 Register reg = locations->InAt(0).AsRegister<Register>();
2168 Register temp = locations->GetTemp(0).AsRegister<Register>();
2169
2170 /*
2171 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2172 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2173 * compared to generic luni implementation which has 5 rounds of swapping bits.
2174 * x = bswap x
2175 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2176 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2177 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2178 */
2179 __ bswapl(reg);
2180 SwapBits(reg, temp, 1, 0x55555555, assembler);
2181 SwapBits(reg, temp, 2, 0x33333333, assembler);
2182 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2183 }
2184
VisitLongReverse(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2186 LocationSummary* locations =
2187 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2188 locations->SetInAt(0, Location::RequiresRegister());
2189 locations->SetOut(Location::SameAsFirstInput());
2190 locations->AddTemp(Location::RequiresRegister());
2191 }
2192
VisitLongReverse(HInvoke * invoke)2193 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2194 X86Assembler* assembler = GetAssembler();
2195 LocationSummary* locations = invoke->GetLocations();
2196
2197 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2198 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2199 Register temp = locations->GetTemp(0).AsRegister<Register>();
2200
2201 // We want to swap high/low, then bswap each one, and then do the same
2202 // as a 32 bit reverse.
2203 // Exchange high and low.
2204 __ movl(temp, reg_low);
2205 __ movl(reg_low, reg_high);
2206 __ movl(reg_high, temp);
2207
2208 // bit-reverse low
2209 __ bswapl(reg_low);
2210 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2211 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2212 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2213
2214 // bit-reverse high
2215 __ bswapl(reg_high);
2216 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2217 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2218 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2219 }
2220
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2221 static void CreateBitCountLocations(
2222 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2223 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2224 // Do nothing if there is no popcnt support. This results in generating
2225 // a call for the intrinsic rather than direct code.
2226 return;
2227 }
2228 LocationSummary* locations =
2229 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2230 if (is_long) {
2231 locations->AddTemp(Location::RequiresRegister());
2232 }
2233 locations->SetInAt(0, Location::Any());
2234 locations->SetOut(Location::RequiresRegister());
2235 }
2236
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2237 static void GenBitCount(X86Assembler* assembler,
2238 CodeGeneratorX86* codegen,
2239 HInvoke* invoke, bool is_long) {
2240 LocationSummary* locations = invoke->GetLocations();
2241 Location src = locations->InAt(0);
2242 Register out = locations->Out().AsRegister<Register>();
2243
2244 if (invoke->InputAt(0)->IsConstant()) {
2245 // Evaluate this at compile time.
2246 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2247 int32_t result = is_long
2248 ? POPCOUNT(static_cast<uint64_t>(value))
2249 : POPCOUNT(static_cast<uint32_t>(value));
2250 codegen->Load32BitValue(out, result);
2251 return;
2252 }
2253
2254 // Handle the non-constant cases.
2255 if (!is_long) {
2256 if (src.IsRegister()) {
2257 __ popcntl(out, src.AsRegister<Register>());
2258 } else {
2259 DCHECK(src.IsStackSlot());
2260 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2261 }
2262 } else {
2263 // The 64-bit case needs to worry about two parts.
2264 Register temp = locations->GetTemp(0).AsRegister<Register>();
2265 if (src.IsRegisterPair()) {
2266 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2267 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2268 } else {
2269 DCHECK(src.IsDoubleStackSlot());
2270 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2271 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2272 }
2273 __ addl(out, temp);
2274 }
2275 }
2276
VisitIntegerBitCount(HInvoke * invoke)2277 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2278 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2279 }
2280
VisitIntegerBitCount(HInvoke * invoke)2281 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2282 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2283 }
2284
VisitLongBitCount(HInvoke * invoke)2285 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2286 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2287 }
2288
VisitLongBitCount(HInvoke * invoke)2289 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2290 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2291 }
2292
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2293 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2294 LocationSummary* locations =
2295 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2296 if (is_long) {
2297 locations->SetInAt(0, Location::RequiresRegister());
2298 } else {
2299 locations->SetInAt(0, Location::Any());
2300 }
2301 locations->SetOut(Location::RequiresRegister());
2302 }
2303
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2304 static void GenLeadingZeros(X86Assembler* assembler,
2305 CodeGeneratorX86* codegen,
2306 HInvoke* invoke, bool is_long) {
2307 LocationSummary* locations = invoke->GetLocations();
2308 Location src = locations->InAt(0);
2309 Register out = locations->Out().AsRegister<Register>();
2310
2311 if (invoke->InputAt(0)->IsConstant()) {
2312 // Evaluate this at compile time.
2313 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2314 if (value == 0) {
2315 value = is_long ? 64 : 32;
2316 } else {
2317 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2318 }
2319 codegen->Load32BitValue(out, value);
2320 return;
2321 }
2322
2323 // Handle the non-constant cases.
2324 if (!is_long) {
2325 if (src.IsRegister()) {
2326 __ bsrl(out, src.AsRegister<Register>());
2327 } else {
2328 DCHECK(src.IsStackSlot());
2329 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2330 }
2331
2332 // BSR sets ZF if the input was zero, and the output is undefined.
2333 NearLabel all_zeroes, done;
2334 __ j(kEqual, &all_zeroes);
2335
2336 // Correct the result from BSR to get the final CLZ result.
2337 __ xorl(out, Immediate(31));
2338 __ jmp(&done);
2339
2340 // Fix the zero case with the expected result.
2341 __ Bind(&all_zeroes);
2342 __ movl(out, Immediate(32));
2343
2344 __ Bind(&done);
2345 return;
2346 }
2347
2348 // 64 bit case needs to worry about both parts of the register.
2349 DCHECK(src.IsRegisterPair());
2350 Register src_lo = src.AsRegisterPairLow<Register>();
2351 Register src_hi = src.AsRegisterPairHigh<Register>();
2352 NearLabel handle_low, done, all_zeroes;
2353
2354 // Is the high word zero?
2355 __ testl(src_hi, src_hi);
2356 __ j(kEqual, &handle_low);
2357
2358 // High word is not zero. We know that the BSR result is defined in this case.
2359 __ bsrl(out, src_hi);
2360
2361 // Correct the result from BSR to get the final CLZ result.
2362 __ xorl(out, Immediate(31));
2363 __ jmp(&done);
2364
2365 // High word was zero. We have to compute the low word count and add 32.
2366 __ Bind(&handle_low);
2367 __ bsrl(out, src_lo);
2368 __ j(kEqual, &all_zeroes);
2369
2370 // We had a valid result. Use an XOR to both correct the result and add 32.
2371 __ xorl(out, Immediate(63));
2372 __ jmp(&done);
2373
2374 // All zero case.
2375 __ Bind(&all_zeroes);
2376 __ movl(out, Immediate(64));
2377
2378 __ Bind(&done);
2379 }
2380
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2381 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2382 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2383 }
2384
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2385 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2386 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2387 }
2388
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2389 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2390 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2391 }
2392
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2393 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2394 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2395 }
2396
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2397 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2398 LocationSummary* locations =
2399 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2400 if (is_long) {
2401 locations->SetInAt(0, Location::RequiresRegister());
2402 } else {
2403 locations->SetInAt(0, Location::Any());
2404 }
2405 locations->SetOut(Location::RequiresRegister());
2406 }
2407
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2408 static void GenTrailingZeros(X86Assembler* assembler,
2409 CodeGeneratorX86* codegen,
2410 HInvoke* invoke, bool is_long) {
2411 LocationSummary* locations = invoke->GetLocations();
2412 Location src = locations->InAt(0);
2413 Register out = locations->Out().AsRegister<Register>();
2414
2415 if (invoke->InputAt(0)->IsConstant()) {
2416 // Evaluate this at compile time.
2417 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2418 if (value == 0) {
2419 value = is_long ? 64 : 32;
2420 } else {
2421 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2422 }
2423 codegen->Load32BitValue(out, value);
2424 return;
2425 }
2426
2427 // Handle the non-constant cases.
2428 if (!is_long) {
2429 if (src.IsRegister()) {
2430 __ bsfl(out, src.AsRegister<Register>());
2431 } else {
2432 DCHECK(src.IsStackSlot());
2433 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2434 }
2435
2436 // BSF sets ZF if the input was zero, and the output is undefined.
2437 NearLabel done;
2438 __ j(kNotEqual, &done);
2439
2440 // Fix the zero case with the expected result.
2441 __ movl(out, Immediate(32));
2442
2443 __ Bind(&done);
2444 return;
2445 }
2446
2447 // 64 bit case needs to worry about both parts of the register.
2448 DCHECK(src.IsRegisterPair());
2449 Register src_lo = src.AsRegisterPairLow<Register>();
2450 Register src_hi = src.AsRegisterPairHigh<Register>();
2451 NearLabel done, all_zeroes;
2452
2453 // If the low word is zero, then ZF will be set. If not, we have the answer.
2454 __ bsfl(out, src_lo);
2455 __ j(kNotEqual, &done);
2456
2457 // Low word was zero. We have to compute the high word count and add 32.
2458 __ bsfl(out, src_hi);
2459 __ j(kEqual, &all_zeroes);
2460
2461 // We had a valid result. Add 32 to account for the low word being zero.
2462 __ addl(out, Immediate(32));
2463 __ jmp(&done);
2464
2465 // All zero case.
2466 __ Bind(&all_zeroes);
2467 __ movl(out, Immediate(64));
2468
2469 __ Bind(&done);
2470 }
2471
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2472 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2473 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2474 }
2475
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2476 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2477 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2478 }
2479
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2480 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2481 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2482 }
2483
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2484 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2485 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2486 }
2487
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2488 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2489 return instruction->InputAt(input0) == instruction->InputAt(input1);
2490 }
2491
2492 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2493 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2494 DataType::Type type,
2495 const Register& array,
2496 const Location& pos,
2497 const Register& base) {
2498 // This routine is only used by the SystemArrayCopy intrinsic at the
2499 // moment. We can allow DataType::Type::kReference as `type` to implement
2500 // the SystemArrayCopyChar intrinsic.
2501 DCHECK_EQ(type, DataType::Type::kReference);
2502 const int32_t element_size = DataType::Size(type);
2503 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2504 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2505
2506 if (pos.IsConstant()) {
2507 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2508 __ leal(base, Address(array, element_size * constant + data_offset));
2509 } else {
2510 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2511 }
2512 }
2513
2514 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2515 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2516 DataType::Type type,
2517 const Location& copy_length,
2518 const Register& base,
2519 const Register& end) {
2520 // This routine is only used by the SystemArrayCopy intrinsic at the
2521 // moment. We can allow DataType::Type::kReference as `type` to implement
2522 // the SystemArrayCopyChar intrinsic.
2523 DCHECK_EQ(type, DataType::Type::kReference);
2524 const int32_t element_size = DataType::Size(type);
2525 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2526
2527 if (copy_length.IsConstant()) {
2528 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2529 __ leal(end, Address(base, element_size * constant));
2530 } else {
2531 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2532 }
2533 }
2534
VisitSystemArrayCopy(HInvoke * invoke)2535 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2536 // The only read barrier implementation supporting the
2537 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2538 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2539 return;
2540 }
2541
2542 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2543 if (invoke->GetLocations() != nullptr) {
2544 // Need a byte register for marking.
2545 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2546
2547 static constexpr size_t kSrc = 0;
2548 static constexpr size_t kSrcPos = 1;
2549 static constexpr size_t kDest = 2;
2550 static constexpr size_t kDestPos = 3;
2551 static constexpr size_t kLength = 4;
2552
2553 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2554 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2555 !invoke->InputAt(kLength)->IsIntConstant()) {
2556 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2557 !IsSameInput(invoke, kSrcPos, kLength) &&
2558 !IsSameInput(invoke, kDestPos, kLength) &&
2559 !IsSameInput(invoke, kSrc, kDest)) {
2560 // Not enough registers, make the length also take a stack slot.
2561 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2562 }
2563 }
2564 }
2565 }
2566
VisitSystemArrayCopy(HInvoke * invoke)2567 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2568 // The only read barrier implementation supporting the
2569 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2570 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2571
2572 X86Assembler* assembler = GetAssembler();
2573 LocationSummary* locations = invoke->GetLocations();
2574
2575 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2576 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2577 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2578 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2579 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2580
2581 Register src = locations->InAt(0).AsRegister<Register>();
2582 Location src_pos = locations->InAt(1);
2583 Register dest = locations->InAt(2).AsRegister<Register>();
2584 Location dest_pos = locations->InAt(3);
2585 Location length_arg = locations->InAt(4);
2586 Location length = length_arg;
2587 Location temp1_loc = locations->GetTemp(0);
2588 Register temp1 = temp1_loc.AsRegister<Register>();
2589 Location temp2_loc = locations->GetTemp(1);
2590 Register temp2 = temp2_loc.AsRegister<Register>();
2591
2592 SlowPathCode* intrinsic_slow_path =
2593 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2594 codegen_->AddSlowPath(intrinsic_slow_path);
2595
2596 NearLabel conditions_on_positions_validated;
2597 SystemArrayCopyOptimizations optimizations(invoke);
2598
2599 // If source and destination are the same, we go to slow path if we need to do
2600 // forward copying.
2601 if (src_pos.IsConstant()) {
2602 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2603 if (dest_pos.IsConstant()) {
2604 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2605 if (optimizations.GetDestinationIsSource()) {
2606 // Checked when building locations.
2607 DCHECK_GE(src_pos_constant, dest_pos_constant);
2608 } else if (src_pos_constant < dest_pos_constant) {
2609 __ cmpl(src, dest);
2610 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2611 }
2612 } else {
2613 if (!optimizations.GetDestinationIsSource()) {
2614 __ cmpl(src, dest);
2615 __ j(kNotEqual, &conditions_on_positions_validated);
2616 }
2617 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2618 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2619 }
2620 } else {
2621 if (!optimizations.GetDestinationIsSource()) {
2622 __ cmpl(src, dest);
2623 __ j(kNotEqual, &conditions_on_positions_validated);
2624 }
2625 if (dest_pos.IsConstant()) {
2626 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2627 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2628 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2629 } else {
2630 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2631 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2632 }
2633 }
2634
2635 __ Bind(&conditions_on_positions_validated);
2636
2637 if (!optimizations.GetSourceIsNotNull()) {
2638 // Bail out if the source is null.
2639 __ testl(src, src);
2640 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2641 }
2642
2643 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2644 // Bail out if the destination is null.
2645 __ testl(dest, dest);
2646 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2647 }
2648
2649 Location temp3_loc = locations->GetTemp(2);
2650 Register temp3 = temp3_loc.AsRegister<Register>();
2651 if (length.IsStackSlot()) {
2652 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2653 length = Location::RegisterLocation(temp3);
2654 }
2655
2656 // If the length is negative, bail out.
2657 // We have already checked in the LocationsBuilder for the constant case.
2658 if (!length.IsConstant() &&
2659 !optimizations.GetCountIsSourceLength() &&
2660 !optimizations.GetCountIsDestinationLength()) {
2661 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2662 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2663 }
2664
2665 // Validity checks: source.
2666 CheckPosition(assembler,
2667 src_pos,
2668 src,
2669 length,
2670 intrinsic_slow_path,
2671 temp1,
2672 optimizations.GetCountIsSourceLength());
2673
2674 // Validity checks: dest.
2675 CheckPosition(assembler,
2676 dest_pos,
2677 dest,
2678 length,
2679 intrinsic_slow_path,
2680 temp1,
2681 optimizations.GetCountIsDestinationLength());
2682
2683 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2684 // Check whether all elements of the source array are assignable to the component
2685 // type of the destination array. We do two checks: the classes are the same,
2686 // or the destination is Object[]. If none of these checks succeed, we go to the
2687 // slow path.
2688
2689 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2690 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2691 // /* HeapReference<Class> */ temp1 = src->klass_
2692 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2693 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2694 // Bail out if the source is not a non primitive array.
2695 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2696 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2697 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2698 __ testl(temp1, temp1);
2699 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2700 // If heap poisoning is enabled, `temp1` has been unpoisoned
2701 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2702 } else {
2703 // /* HeapReference<Class> */ temp1 = src->klass_
2704 __ movl(temp1, Address(src, class_offset));
2705 __ MaybeUnpoisonHeapReference(temp1);
2706 // Bail out if the source is not a non primitive array.
2707 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2708 __ movl(temp1, Address(temp1, component_offset));
2709 __ testl(temp1, temp1);
2710 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2711 __ MaybeUnpoisonHeapReference(temp1);
2712 }
2713 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2714 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2715 }
2716
2717 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2718 if (length.Equals(Location::RegisterLocation(temp3))) {
2719 // When Baker read barriers are enabled, register `temp3`,
2720 // which in the present case contains the `length` parameter,
2721 // will be overwritten below. Make the `length` location
2722 // reference the original stack location; it will be moved
2723 // back to `temp3` later if necessary.
2724 DCHECK(length_arg.IsStackSlot());
2725 length = length_arg;
2726 }
2727
2728 // /* HeapReference<Class> */ temp1 = dest->klass_
2729 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2730 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2731
2732 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2733 // Bail out if the destination is not a non primitive array.
2734 //
2735 // Register `temp1` is not trashed by the read barrier emitted
2736 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2737 // method produces a call to a ReadBarrierMarkRegX entry point,
2738 // which saves all potentially live registers, including
2739 // temporaries such a `temp1`.
2740 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2741 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2742 invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2743 __ testl(temp2, temp2);
2744 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2745 // If heap poisoning is enabled, `temp2` has been unpoisoned
2746 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2747 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2748 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2749 }
2750
2751 // For the same reason given earlier, `temp1` is not trashed by the
2752 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2753 // /* HeapReference<Class> */ temp2 = src->klass_
2754 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2755 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2756 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2757 __ cmpl(temp1, temp2);
2758
2759 if (optimizations.GetDestinationIsTypedObjectArray()) {
2760 NearLabel do_copy;
2761 __ j(kEqual, &do_copy);
2762 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2763 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2764 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2765 // We do not need to emit a read barrier for the following
2766 // heap reference load, as `temp1` is only used in a
2767 // comparison with null below, and this reference is not
2768 // kept afterwards.
2769 __ cmpl(Address(temp1, super_offset), Immediate(0));
2770 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2771 __ Bind(&do_copy);
2772 } else {
2773 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2774 }
2775 } else {
2776 // Non read barrier code.
2777
2778 // /* HeapReference<Class> */ temp1 = dest->klass_
2779 __ movl(temp1, Address(dest, class_offset));
2780 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2781 __ MaybeUnpoisonHeapReference(temp1);
2782 // Bail out if the destination is not a non primitive array.
2783 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2784 __ movl(temp2, Address(temp1, component_offset));
2785 __ testl(temp2, temp2);
2786 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2787 __ MaybeUnpoisonHeapReference(temp2);
2788 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2789 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2790 // Re-poison the heap reference to make the compare instruction below
2791 // compare two poisoned references.
2792 __ PoisonHeapReference(temp1);
2793 }
2794
2795 // Note: if heap poisoning is on, we are comparing two poisoned references here.
2796 __ cmpl(temp1, Address(src, class_offset));
2797
2798 if (optimizations.GetDestinationIsTypedObjectArray()) {
2799 NearLabel do_copy;
2800 __ j(kEqual, &do_copy);
2801 __ MaybeUnpoisonHeapReference(temp1);
2802 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2803 __ movl(temp1, Address(temp1, component_offset));
2804 __ MaybeUnpoisonHeapReference(temp1);
2805 __ cmpl(Address(temp1, super_offset), Immediate(0));
2806 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2807 __ Bind(&do_copy);
2808 } else {
2809 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2810 }
2811 }
2812 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2813 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2814 // Bail out if the source is not a non primitive array.
2815 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2816 // /* HeapReference<Class> */ temp1 = src->klass_
2817 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2818 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2819 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2820 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2821 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2822 __ testl(temp1, temp1);
2823 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2824 // If heap poisoning is enabled, `temp1` has been unpoisoned
2825 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2826 } else {
2827 // /* HeapReference<Class> */ temp1 = src->klass_
2828 __ movl(temp1, Address(src, class_offset));
2829 __ MaybeUnpoisonHeapReference(temp1);
2830 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2831 __ movl(temp1, Address(temp1, component_offset));
2832 __ testl(temp1, temp1);
2833 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2834 __ MaybeUnpoisonHeapReference(temp1);
2835 }
2836 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2837 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2838 }
2839
2840 const DataType::Type type = DataType::Type::kReference;
2841 const int32_t element_size = DataType::Size(type);
2842
2843 // Compute the base source address in `temp1`.
2844 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2845
2846 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2847 // If it is needed (in the case of the fast-path loop), the base
2848 // destination address is computed later, as `temp2` is used for
2849 // intermediate computations.
2850
2851 // Compute the end source address in `temp3`.
2852 if (length.IsStackSlot()) {
2853 // Location `length` is again pointing at a stack slot, as
2854 // register `temp3` (which was containing the length parameter
2855 // earlier) has been overwritten; restore it now
2856 DCHECK(length.Equals(length_arg));
2857 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2858 length = Location::RegisterLocation(temp3);
2859 }
2860 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2861
2862 // SystemArrayCopy implementation for Baker read barriers (see
2863 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2864 //
2865 // if (src_ptr != end_ptr) {
2866 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2867 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2868 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2869 // if (is_gray) {
2870 // // Slow-path copy.
2871 // for (size_t i = 0; i != length; ++i) {
2872 // dest_array[dest_pos + i] =
2873 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2874 // }
2875 // } else {
2876 // // Fast-path copy.
2877 // do {
2878 // *dest_ptr++ = *src_ptr++;
2879 // } while (src_ptr != end_ptr)
2880 // }
2881 // }
2882
2883 NearLabel loop, done;
2884
2885 // Don't enter copy loop if `length == 0`.
2886 __ cmpl(temp1, temp3);
2887 __ j(kEqual, &done);
2888
2889 // Given the numeric representation, it's enough to check the low bit of the rb_state.
2890 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2891 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2892 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2893 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2894 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2895
2896 // if (rb_state == ReadBarrier::GrayState())
2897 // goto slow_path;
2898 // At this point, just do the "if" and make sure that flags are preserved until the branch.
2899 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2900
2901 // Load fence to prevent load-load reordering.
2902 // Note that this is a no-op, thanks to the x86 memory model.
2903 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2904
2905 // Slow path used to copy array when `src` is gray.
2906 SlowPathCode* read_barrier_slow_path =
2907 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2908 codegen_->AddSlowPath(read_barrier_slow_path);
2909
2910 // We have done the "if" of the gray bit check above, now branch based on the flags.
2911 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2912
2913 // Fast-path copy.
2914 // Compute the base destination address in `temp2`.
2915 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2916 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2917 // poison/unpoison.
2918 __ Bind(&loop);
2919 __ pushl(Address(temp1, 0));
2920 __ cfi().AdjustCFAOffset(4);
2921 __ popl(Address(temp2, 0));
2922 __ cfi().AdjustCFAOffset(-4);
2923 __ addl(temp1, Immediate(element_size));
2924 __ addl(temp2, Immediate(element_size));
2925 __ cmpl(temp1, temp3);
2926 __ j(kNotEqual, &loop);
2927
2928 __ Bind(read_barrier_slow_path->GetExitLabel());
2929 __ Bind(&done);
2930 } else {
2931 // Non read barrier code.
2932 // Compute the base destination address in `temp2`.
2933 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2934 // Compute the end source address in `temp3`.
2935 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2936 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2937 // poison/unpoison.
2938 NearLabel loop, done;
2939 __ cmpl(temp1, temp3);
2940 __ j(kEqual, &done);
2941 __ Bind(&loop);
2942 __ pushl(Address(temp1, 0));
2943 __ cfi().AdjustCFAOffset(4);
2944 __ popl(Address(temp2, 0));
2945 __ cfi().AdjustCFAOffset(-4);
2946 __ addl(temp1, Immediate(element_size));
2947 __ addl(temp2, Immediate(element_size));
2948 __ cmpl(temp1, temp3);
2949 __ j(kNotEqual, &loop);
2950 __ Bind(&done);
2951 }
2952
2953 // We only need one card marking on the destination array.
2954 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
2955
2956 __ Bind(intrinsic_slow_path->GetExitLabel());
2957 }
2958
VisitIntegerValueOf(HInvoke * invoke)2959 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
2960 DCHECK(invoke->IsInvokeStaticOrDirect());
2961 InvokeRuntimeCallingConvention calling_convention;
2962 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2963 invoke,
2964 codegen_,
2965 Location::RegisterLocation(EAX),
2966 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2967
2968 LocationSummary* locations = invoke->GetLocations();
2969 if (locations != nullptr) {
2970 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
2971 if (invoke_static_or_direct->HasSpecialInput() &&
2972 invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
2973 ->IsX86ComputeBaseMethodAddress()) {
2974 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
2975 Location::RequiresRegister());
2976 }
2977 }
2978 }
2979
VisitIntegerValueOf(HInvoke * invoke)2980 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
2981 DCHECK(invoke->IsInvokeStaticOrDirect());
2982 IntrinsicVisitor::IntegerValueOfInfo info =
2983 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2984 LocationSummary* locations = invoke->GetLocations();
2985 X86Assembler* assembler = GetAssembler();
2986
2987 Register out = locations->Out().AsRegister<Register>();
2988 InvokeRuntimeCallingConvention calling_convention;
2989 if (invoke->InputAt(0)->IsConstant()) {
2990 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2991 if (static_cast<uint32_t>(value - info.low) < info.length) {
2992 // Just embed the j.l.Integer in the code.
2993 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2994 codegen_->LoadBootImageAddress(
2995 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
2996 } else {
2997 DCHECK(locations->CanCall());
2998 // Allocate and initialize a new j.l.Integer.
2999 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3000 // JIT object table.
3001 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3002 info.integer_boot_image_offset);
3003 __ movl(Address(out, info.value_offset), Immediate(value));
3004 }
3005 } else {
3006 DCHECK(locations->CanCall());
3007 Register in = locations->InAt(0).AsRegister<Register>();
3008 // Check bounds of our cache.
3009 __ leal(out, Address(in, -info.low));
3010 __ cmpl(out, Immediate(info.length));
3011 NearLabel allocate, done;
3012 __ j(kAboveEqual, &allocate);
3013 // If the value is within the bounds, load the j.l.Integer directly from the array.
3014 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3015 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3016 "Check heap reference size.");
3017 if (codegen_->GetCompilerOptions().IsBootImage()) {
3018 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3019 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3020 HX86ComputeBaseMethodAddress* method_address =
3021 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3022 DCHECK(method_address != nullptr);
3023 Register method_address_reg =
3024 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3025 __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset));
3026 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3027 } else {
3028 // Note: We're about to clobber the index in `out`, so we need to use `in` and
3029 // adjust the offset accordingly.
3030 uint32_t mid_array_boot_image_offset =
3031 info.array_data_boot_image_reference - info.low * kElementSize;
3032 codegen_->LoadBootImageAddress(
3033 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3034 DCHECK_NE(out, in);
3035 __ movl(out, Address(out, in, TIMES_4, 0));
3036 }
3037 __ MaybeUnpoisonHeapReference(out);
3038 __ jmp(&done);
3039 __ Bind(&allocate);
3040 // Otherwise allocate and initialize a new j.l.Integer.
3041 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
3042 info.integer_boot_image_offset);
3043 __ movl(Address(out, info.value_offset), in);
3044 __ Bind(&done);
3045 }
3046 }
3047
VisitThreadInterrupted(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3049 LocationSummary* locations =
3050 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3051 locations->SetOut(Location::RequiresRegister());
3052 }
3053
VisitThreadInterrupted(HInvoke * invoke)3054 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3055 X86Assembler* assembler = GetAssembler();
3056 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3057 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3058 NearLabel done;
3059 __ fs()->movl(out, address);
3060 __ testl(out, out);
3061 __ j(kEqual, &done);
3062 __ fs()->movl(address, Immediate(0));
3063 codegen_->MemoryFence();
3064 __ Bind(&done);
3065 }
3066
VisitReachabilityFence(HInvoke * invoke)3067 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3068 LocationSummary* locations =
3069 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3070 locations->SetInAt(0, Location::Any());
3071 }
3072
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3073 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3074
3075 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3076 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
3077 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3078 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3079 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3080 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3081 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
3082 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
3083 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
3084 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
3085 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
3086 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
3087 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
3088 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
3089 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
3090 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
3091 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
3092 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
3093
3094 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3095 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3096 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3097 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3098 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3099 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
3100 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
3101 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
3102 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
3103 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
3104 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
3105 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
3106 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
3107 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
3108 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
3109 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3110 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3111
3112 // 1.8.
3113 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3114 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3115 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3116 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3117 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3118
3119 UNREACHABLE_INTRINSICS(X86)
3120
3121 #undef __
3122
3123 } // namespace x86
3124 } // namespace art
3125