1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/var_handle.h"
39 #include "optimizing/nodes.h"
40 #include "profiling_info_builder.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "trace.h"
44 #include "utils/assembler.h"
45 #include "utils/stack_checks.h"
46 #include "utils/x86/assembler_x86.h"
47 #include "utils/x86/constants_x86.h"
48 #include "utils/x86/managed_register_x86.h"
49
50 namespace art HIDDEN {
51
52 template<class MirrorType>
53 class GcRoot;
54
55 namespace x86 {
56
57 static constexpr int kCurrentMethodStackOffset = 0;
58 static constexpr Register kMethodRegisterArgument = EAX;
59 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
60
61 static constexpr int kC2ConditionMask = 0x400;
62
63 static constexpr int kFakeReturnRegister = Register(8);
64
65 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
66 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
67
OneRegInReferenceOutSaveEverythingCallerSaves()68 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
69 InvokeRuntimeCallingConvention calling_convention;
70 RegisterSet caller_saves = RegisterSet::Empty();
71 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
72 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
73 // that the kPrimNot result register is the same as the first argument register.
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86 : public SlowPathCode {
82 public:
NullCheckSlowPathX86(HNullCheck * instruction)83 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
93 instruction_,
94 instruction_->GetDexPc(),
95 this);
96 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97 }
98
IsFatal() const99 bool IsFatal() const override { return true; }
100
GetDescription() const101 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
102
103 private:
104 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
105 };
106
107 class DivZeroCheckSlowPathX86 : public SlowPathCode {
108 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)109 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110
EmitNativeCode(CodeGenerator * codegen)111 void EmitNativeCode(CodeGenerator* codegen) override {
112 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
113 __ Bind(GetEntryLabel());
114 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116 }
117
IsFatal() const118 bool IsFatal() const override { return true; }
119
GetDescription() const120 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
121
122 private:
123 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
124 };
125
126 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
127 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)128 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
129 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
130
EmitNativeCode(CodeGenerator * codegen)131 void EmitNativeCode(CodeGenerator* codegen) override {
132 __ Bind(GetEntryLabel());
133 if (is_div_) {
134 __ negl(reg_);
135 } else {
136 __ movl(reg_, Immediate(0));
137 }
138 __ jmp(GetExitLabel());
139 }
140
GetDescription() const141 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
142
143 private:
144 Register reg_;
145 bool is_div_;
146 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
147 };
148
149 class BoundsCheckSlowPathX86 : public SlowPathCode {
150 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)151 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
152
EmitNativeCode(CodeGenerator * codegen)153 void EmitNativeCode(CodeGenerator* codegen) override {
154 LocationSummary* locations = instruction_->GetLocations();
155 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
156 __ Bind(GetEntryLabel());
157 if (instruction_->CanThrowIntoCatchBlock()) {
158 // Live registers will be restored in the catch block if caught.
159 SaveLiveRegisters(codegen, locations);
160 }
161
162 Location index_loc = locations->InAt(0);
163 Location length_loc = locations->InAt(1);
164 InvokeRuntimeCallingConvention calling_convention;
165 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
166 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
167
168 // Are we using an array length from memory?
169 if (!length_loc.IsValid()) {
170 DCHECK(instruction_->InputAt(1)->IsArrayLength());
171 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
172 DCHECK(array_length->IsEmittedAtUseSite());
173 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
174 Location array_loc = array_length->GetLocations()->InAt(0);
175 if (!index_loc.Equals(length_arg)) {
176 // The index is not clobbered by loading the length directly to `length_arg`.
177 __ movl(length_arg.AsRegister<Register>(),
178 Address(array_loc.AsRegister<Register>(), len_offset));
179 x86_codegen->Move32(index_arg, index_loc);
180 } else if (!array_loc.Equals(index_arg)) {
181 // The array reference is not clobbered by the index move.
182 x86_codegen->Move32(index_arg, index_loc);
183 __ movl(length_arg.AsRegister<Register>(),
184 Address(array_loc.AsRegister<Register>(), len_offset));
185 } else {
186 // We do not have a temporary we could use, so swap the registers using the
187 // parallel move resolver and replace the array with the length afterwards.
188 codegen->EmitParallelMoves(
189 index_loc,
190 index_arg,
191 DataType::Type::kInt32,
192 array_loc,
193 length_arg,
194 DataType::Type::kReference);
195 __ movl(length_arg.AsRegister<Register>(),
196 Address(length_arg.AsRegister<Register>(), len_offset));
197 }
198 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
199 __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
200 }
201 } else {
202 // We're moving two locations to locations that could overlap,
203 // so we need a parallel move resolver.
204 codegen->EmitParallelMoves(
205 index_loc,
206 index_arg,
207 DataType::Type::kInt32,
208 length_loc,
209 length_arg,
210 DataType::Type::kInt32);
211 }
212
213 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
214 ? kQuickThrowStringBounds
215 : kQuickThrowArrayBounds;
216 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
217 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
218 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
219 }
220
IsFatal() const221 bool IsFatal() const override { return true; }
222
GetDescription() const223 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
224
225 private:
226 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
227 };
228
229 class SuspendCheckSlowPathX86 : public SlowPathCode {
230 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)231 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
232 : SlowPathCode(instruction), successor_(successor) {}
233
EmitNativeCode(CodeGenerator * codegen)234 void EmitNativeCode(CodeGenerator* codegen) override {
235 LocationSummary* locations = instruction_->GetLocations();
236 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
237 __ Bind(GetEntryLabel());
238 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
239 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
240 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
241 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
242 if (successor_ == nullptr) {
243 __ jmp(GetReturnLabel());
244 } else {
245 __ jmp(x86_codegen->GetLabelOf(successor_));
246 }
247 }
248
GetReturnLabel()249 Label* GetReturnLabel() {
250 DCHECK(successor_ == nullptr);
251 return &return_label_;
252 }
253
GetSuccessor() const254 HBasicBlock* GetSuccessor() const {
255 return successor_;
256 }
257
GetDescription() const258 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
259
260 private:
261 HBasicBlock* const successor_;
262 Label return_label_;
263
264 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
265 };
266
267 class LoadStringSlowPathX86 : public SlowPathCode {
268 public:
LoadStringSlowPathX86(HLoadString * instruction)269 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
270
EmitNativeCode(CodeGenerator * codegen)271 void EmitNativeCode(CodeGenerator* codegen) override {
272 LocationSummary* locations = instruction_->GetLocations();
273 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
274
275 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
276 __ Bind(GetEntryLabel());
277 SaveLiveRegisters(codegen, locations);
278
279 InvokeRuntimeCallingConvention calling_convention;
280 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
281 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
282 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
283 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
284 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
285 RestoreLiveRegisters(codegen, locations);
286
287 __ jmp(GetExitLabel());
288 }
289
GetDescription() const290 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
291
292 private:
293 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
294 };
295
296 class LoadClassSlowPathX86 : public SlowPathCode {
297 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)298 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
299 : SlowPathCode(at), cls_(cls) {
300 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
301 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
302 }
303
EmitNativeCode(CodeGenerator * codegen)304 void EmitNativeCode(CodeGenerator* codegen) override {
305 LocationSummary* locations = instruction_->GetLocations();
306 Location out = locations->Out();
307 const uint32_t dex_pc = instruction_->GetDexPc();
308 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
309 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
310
311 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
312 __ Bind(GetEntryLabel());
313 SaveLiveRegisters(codegen, locations);
314
315 InvokeRuntimeCallingConvention calling_convention;
316 if (must_resolve_type) {
317 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
318 x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
319 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
320 &cls_->GetDexFile()));
321 dex::TypeIndex type_index = cls_->GetTypeIndex();
322 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
323 if (cls_->NeedsAccessCheck()) {
324 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
325 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
326 } else {
327 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
328 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
329 }
330 // If we also must_do_clinit, the resolved type is now in the correct register.
331 } else {
332 DCHECK(must_do_clinit);
333 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
334 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
335 }
336 if (must_do_clinit) {
337 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
338 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
339 }
340
341 // Move the class to the desired location.
342 if (out.IsValid()) {
343 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
344 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
345 }
346 RestoreLiveRegisters(codegen, locations);
347 __ jmp(GetExitLabel());
348 }
349
GetDescription() const350 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
351
352 private:
353 // The class this slow path will load.
354 HLoadClass* const cls_;
355
356 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
357 };
358
359 class TypeCheckSlowPathX86 : public SlowPathCode {
360 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)361 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
362 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
363
EmitNativeCode(CodeGenerator * codegen)364 void EmitNativeCode(CodeGenerator* codegen) override {
365 LocationSummary* locations = instruction_->GetLocations();
366 DCHECK(instruction_->IsCheckCast()
367 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
368
369 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
370 __ Bind(GetEntryLabel());
371
372 if (kPoisonHeapReferences &&
373 instruction_->IsCheckCast() &&
374 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
375 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
376 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
377 }
378
379 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
380 SaveLiveRegisters(codegen, locations);
381 }
382
383 // We're moving two locations to locations that could overlap, so we need a parallel
384 // move resolver.
385 InvokeRuntimeCallingConvention calling_convention;
386 x86_codegen->EmitParallelMoves(locations->InAt(0),
387 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
388 DataType::Type::kReference,
389 locations->InAt(1),
390 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
391 DataType::Type::kReference);
392 if (instruction_->IsInstanceOf()) {
393 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
394 instruction_,
395 instruction_->GetDexPc(),
396 this);
397 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
398 } else {
399 DCHECK(instruction_->IsCheckCast());
400 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
401 instruction_,
402 instruction_->GetDexPc(),
403 this);
404 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
405 }
406
407 if (!is_fatal_) {
408 if (instruction_->IsInstanceOf()) {
409 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
410 }
411 RestoreLiveRegisters(codegen, locations);
412
413 __ jmp(GetExitLabel());
414 }
415 }
416
GetDescription() const417 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const418 bool IsFatal() const override { return is_fatal_; }
419
420 private:
421 const bool is_fatal_;
422
423 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
424 };
425
426 class DeoptimizationSlowPathX86 : public SlowPathCode {
427 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)428 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
429 : SlowPathCode(instruction) {}
430
EmitNativeCode(CodeGenerator * codegen)431 void EmitNativeCode(CodeGenerator* codegen) override {
432 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
433 __ Bind(GetEntryLabel());
434 LocationSummary* locations = instruction_->GetLocations();
435 SaveLiveRegisters(codegen, locations);
436 InvokeRuntimeCallingConvention calling_convention;
437 x86_codegen->Load32BitValue(
438 calling_convention.GetRegisterAt(0),
439 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
440 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
441 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
442 }
443
GetDescription() const444 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
445
446 private:
447 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
448 };
449
450 class ArraySetSlowPathX86 : public SlowPathCode {
451 public:
ArraySetSlowPathX86(HInstruction * instruction)452 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
453
EmitNativeCode(CodeGenerator * codegen)454 void EmitNativeCode(CodeGenerator* codegen) override {
455 LocationSummary* locations = instruction_->GetLocations();
456 __ Bind(GetEntryLabel());
457 SaveLiveRegisters(codegen, locations);
458
459 InvokeRuntimeCallingConvention calling_convention;
460 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
461 parallel_move.AddMove(
462 locations->InAt(0),
463 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
464 DataType::Type::kReference,
465 nullptr);
466 parallel_move.AddMove(
467 locations->InAt(1),
468 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
469 DataType::Type::kInt32,
470 nullptr);
471 parallel_move.AddMove(
472 locations->InAt(2),
473 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
474 DataType::Type::kReference,
475 nullptr);
476 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
477
478 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
479 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
480 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
481 RestoreLiveRegisters(codegen, locations);
482 __ jmp(GetExitLabel());
483 }
484
GetDescription() const485 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
486
487 private:
488 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
489 };
490
491 // Slow path marking an object reference `ref` during a read
492 // barrier. The field `obj.field` in the object `obj` holding this
493 // reference does not get updated by this slow path after marking (see
494 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
495 //
496 // This means that after the execution of this slow path, `ref` will
497 // always be up-to-date, but `obj.field` may not; i.e., after the
498 // flip, `ref` will be a to-space reference, but `obj.field` will
499 // probably still be a from-space reference (unless it gets updated by
500 // another thread, or if another thread installed another object
501 // reference (different from `ref`) in `obj.field`).
502 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
503 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)504 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
505 Location ref,
506 bool unpoison_ref_before_marking)
507 : SlowPathCode(instruction),
508 ref_(ref),
509 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
510 }
511
GetDescription() const512 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
513
EmitNativeCode(CodeGenerator * codegen)514 void EmitNativeCode(CodeGenerator* codegen) override {
515 DCHECK(codegen->EmitReadBarrier());
516 LocationSummary* locations = instruction_->GetLocations();
517 Register ref_reg = ref_.AsRegister<Register>();
518 DCHECK(locations->CanCall());
519 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
520 DCHECK(instruction_->IsInstanceFieldGet() ||
521 instruction_->IsStaticFieldGet() ||
522 instruction_->IsArrayGet() ||
523 instruction_->IsArraySet() ||
524 instruction_->IsLoadClass() ||
525 instruction_->IsLoadString() ||
526 instruction_->IsInstanceOf() ||
527 instruction_->IsCheckCast() ||
528 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
529 << "Unexpected instruction in read barrier marking slow path: "
530 << instruction_->DebugName();
531
532 __ Bind(GetEntryLabel());
533 if (unpoison_ref_before_marking_) {
534 // Object* ref = ref_addr->AsMirrorPtr()
535 __ MaybeUnpoisonHeapReference(ref_reg);
536 }
537 // No need to save live registers; it's taken care of by the
538 // entrypoint. Also, there is no need to update the stack mask,
539 // as this runtime call will not trigger a garbage collection.
540 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
541 DCHECK_NE(ref_reg, ESP);
542 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
543 // "Compact" slow path, saving two moves.
544 //
545 // Instead of using the standard runtime calling convention (input
546 // and output in EAX):
547 //
548 // EAX <- ref
549 // EAX <- ReadBarrierMark(EAX)
550 // ref <- EAX
551 //
552 // we just use rX (the register containing `ref`) as input and output
553 // of a dedicated entrypoint:
554 //
555 // rX <- ReadBarrierMarkRegX(rX)
556 //
557 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
558 // This runtime call does not require a stack map.
559 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
560 __ jmp(GetExitLabel());
561 }
562
563 private:
564 // The location (register) of the marked object reference.
565 const Location ref_;
566 // Should the reference in `ref_` be unpoisoned prior to marking it?
567 const bool unpoison_ref_before_marking_;
568
569 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
570 };
571
572 // Slow path marking an object reference `ref` during a read barrier,
573 // and if needed, atomically updating the field `obj.field` in the
574 // object `obj` holding this reference after marking (contrary to
575 // ReadBarrierMarkSlowPathX86 above, which never tries to update
576 // `obj.field`).
577 //
578 // This means that after the execution of this slow path, both `ref`
579 // and `obj.field` will be up-to-date; i.e., after the flip, both will
580 // hold the same to-space reference (unless another thread installed
581 // another object reference (different from `ref`) in `obj.field`).
582 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
583 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)584 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
585 Location ref,
586 Register obj,
587 const Address& field_addr,
588 bool unpoison_ref_before_marking,
589 Register temp)
590 : SlowPathCode(instruction),
591 ref_(ref),
592 obj_(obj),
593 field_addr_(field_addr),
594 unpoison_ref_before_marking_(unpoison_ref_before_marking),
595 temp_(temp) {
596 }
597
GetDescription() const598 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
599
EmitNativeCode(CodeGenerator * codegen)600 void EmitNativeCode(CodeGenerator* codegen) override {
601 DCHECK(codegen->EmitReadBarrier());
602 LocationSummary* locations = instruction_->GetLocations();
603 Register ref_reg = ref_.AsRegister<Register>();
604 DCHECK(locations->CanCall());
605 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
606 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
607 << "Unexpected instruction in read barrier marking and field updating slow path: "
608 << instruction_->DebugName();
609 HInvoke* invoke = instruction_->AsInvoke();
610 DCHECK(IsUnsafeCASReference(invoke) ||
611 IsUnsafeGetAndSetReference(invoke) ||
612 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
613
614 __ Bind(GetEntryLabel());
615 if (unpoison_ref_before_marking_) {
616 // Object* ref = ref_addr->AsMirrorPtr()
617 __ MaybeUnpoisonHeapReference(ref_reg);
618 }
619
620 // Save the old (unpoisoned) reference.
621 __ movl(temp_, ref_reg);
622
623 // No need to save live registers; it's taken care of by the
624 // entrypoint. Also, there is no need to update the stack mask,
625 // as this runtime call will not trigger a garbage collection.
626 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
627 DCHECK_NE(ref_reg, ESP);
628 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
629 // "Compact" slow path, saving two moves.
630 //
631 // Instead of using the standard runtime calling convention (input
632 // and output in EAX):
633 //
634 // EAX <- ref
635 // EAX <- ReadBarrierMark(EAX)
636 // ref <- EAX
637 //
638 // we just use rX (the register containing `ref`) as input and output
639 // of a dedicated entrypoint:
640 //
641 // rX <- ReadBarrierMarkRegX(rX)
642 //
643 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
644 // This runtime call does not require a stack map.
645 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
646
647 // If the new reference is different from the old reference,
648 // update the field in the holder (`*field_addr`).
649 //
650 // Note that this field could also hold a different object, if
651 // another thread had concurrently changed it. In that case, the
652 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
653 // operation below would abort the CAS, leaving the field as-is.
654 NearLabel done;
655 __ cmpl(temp_, ref_reg);
656 __ j(kEqual, &done);
657
658 // Update the holder's field atomically. This may fail if
659 // mutator updates before us, but it's OK. This is achieved
660 // using a strong compare-and-set (CAS) operation with relaxed
661 // memory synchronization ordering, where the expected value is
662 // the old reference and the desired value is the new reference.
663 // This operation is implemented with a 32-bit LOCK CMPXLCHG
664 // instruction, which requires the expected value (the old
665 // reference) to be in EAX. Save EAX beforehand, and move the
666 // expected value (stored in `temp_`) into EAX.
667 __ pushl(EAX);
668 __ movl(EAX, temp_);
669
670 // Convenience aliases.
671 Register base = obj_;
672 Register expected = EAX;
673 Register value = ref_reg;
674
675 bool base_equals_value = (base == value);
676 if (kPoisonHeapReferences) {
677 if (base_equals_value) {
678 // If `base` and `value` are the same register location, move
679 // `value` to a temporary register. This way, poisoning
680 // `value` won't invalidate `base`.
681 value = temp_;
682 __ movl(value, base);
683 }
684
685 // Check that the register allocator did not assign the location
686 // of `expected` (EAX) to `value` nor to `base`, so that heap
687 // poisoning (when enabled) works as intended below.
688 // - If `value` were equal to `expected`, both references would
689 // be poisoned twice, meaning they would not be poisoned at
690 // all, as heap poisoning uses address negation.
691 // - If `base` were equal to `expected`, poisoning `expected`
692 // would invalidate `base`.
693 DCHECK_NE(value, expected);
694 DCHECK_NE(base, expected);
695
696 __ PoisonHeapReference(expected);
697 __ PoisonHeapReference(value);
698 }
699
700 __ LockCmpxchgl(field_addr_, value);
701
702 // If heap poisoning is enabled, we need to unpoison the values
703 // that were poisoned earlier.
704 if (kPoisonHeapReferences) {
705 if (base_equals_value) {
706 // `value` has been moved to a temporary register, no need
707 // to unpoison it.
708 } else {
709 __ UnpoisonHeapReference(value);
710 }
711 // No need to unpoison `expected` (EAX), as it is be overwritten below.
712 }
713
714 // Restore EAX.
715 __ popl(EAX);
716
717 __ Bind(&done);
718 __ jmp(GetExitLabel());
719 }
720
721 private:
722 // The location (register) of the marked object reference.
723 const Location ref_;
724 // The register containing the object holding the marked object reference field.
725 const Register obj_;
726 // The address of the marked reference field. The base of this address must be `obj_`.
727 const Address field_addr_;
728
729 // Should the reference in `ref_` be unpoisoned prior to marking it?
730 const bool unpoison_ref_before_marking_;
731
732 const Register temp_;
733
734 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
735 };
736
737 // Slow path generating a read barrier for a heap reference.
738 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
739 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)740 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
741 Location out,
742 Location ref,
743 Location obj,
744 uint32_t offset,
745 Location index)
746 : SlowPathCode(instruction),
747 out_(out),
748 ref_(ref),
749 obj_(obj),
750 offset_(offset),
751 index_(index) {
752 // If `obj` is equal to `out` or `ref`, it means the initial object
753 // has been overwritten by (or after) the heap object reference load
754 // to be instrumented, e.g.:
755 //
756 // __ movl(out, Address(out, offset));
757 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
758 //
759 // In that case, we have lost the information about the original
760 // object, and the emitted read barrier cannot work properly.
761 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
762 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
763 }
764
EmitNativeCode(CodeGenerator * codegen)765 void EmitNativeCode(CodeGenerator* codegen) override {
766 DCHECK(codegen->EmitReadBarrier());
767 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
768 LocationSummary* locations = instruction_->GetLocations();
769 Register reg_out = out_.AsRegister<Register>();
770 DCHECK(locations->CanCall());
771 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
772 DCHECK(instruction_->IsInstanceFieldGet() ||
773 instruction_->IsStaticFieldGet() ||
774 instruction_->IsArrayGet() ||
775 instruction_->IsInstanceOf() ||
776 instruction_->IsCheckCast() ||
777 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
778 << "Unexpected instruction in read barrier for heap reference slow path: "
779 << instruction_->DebugName();
780
781 __ Bind(GetEntryLabel());
782 SaveLiveRegisters(codegen, locations);
783
784 // We may have to change the index's value, but as `index_` is a
785 // constant member (like other "inputs" of this slow path),
786 // introduce a copy of it, `index`.
787 Location index = index_;
788 if (index_.IsValid()) {
789 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
790 if (instruction_->IsArrayGet()) {
791 // Compute the actual memory offset and store it in `index`.
792 Register index_reg = index_.AsRegister<Register>();
793 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
794 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
795 // We are about to change the value of `index_reg` (see the
796 // calls to art::x86::X86Assembler::shll and
797 // art::x86::X86Assembler::AddImmediate below), but it has
798 // not been saved by the previous call to
799 // art::SlowPathCode::SaveLiveRegisters, as it is a
800 // callee-save register --
801 // art::SlowPathCode::SaveLiveRegisters does not consider
802 // callee-save registers, as it has been designed with the
803 // assumption that callee-save registers are supposed to be
804 // handled by the called function. So, as a callee-save
805 // register, `index_reg` _would_ eventually be saved onto
806 // the stack, but it would be too late: we would have
807 // changed its value earlier. Therefore, we manually save
808 // it here into another freely available register,
809 // `free_reg`, chosen of course among the caller-save
810 // registers (as a callee-save `free_reg` register would
811 // exhibit the same problem).
812 //
813 // Note we could have requested a temporary register from
814 // the register allocator instead; but we prefer not to, as
815 // this is a slow path, and we know we can find a
816 // caller-save register that is available.
817 Register free_reg = FindAvailableCallerSaveRegister(codegen);
818 __ movl(free_reg, index_reg);
819 index_reg = free_reg;
820 index = Location::RegisterLocation(index_reg);
821 } else {
822 // The initial register stored in `index_` has already been
823 // saved in the call to art::SlowPathCode::SaveLiveRegisters
824 // (as it is not a callee-save register), so we can freely
825 // use it.
826 }
827 // Shifting the index value contained in `index_reg` by the scale
828 // factor (2) cannot overflow in practice, as the runtime is
829 // unable to allocate object arrays with a size larger than
830 // 2^26 - 1 (that is, 2^28 - 4 bytes).
831 __ shll(index_reg, Immediate(TIMES_4));
832 static_assert(
833 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
834 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
835 __ AddImmediate(index_reg, Immediate(offset_));
836 } else {
837 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
838 // intrinsics, `index_` is not shifted by a scale factor of 2
839 // (as in the case of ArrayGet), as it is actually an offset
840 // to an object field within an object.
841 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
842 DCHECK(instruction_->GetLocations()->Intrinsified());
843 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
844 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
845 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
846 (instruction_->AsInvoke()->GetIntrinsic() ==
847 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
848 (instruction_->AsInvoke()->GetIntrinsic() ==
849 Intrinsics::kJdkUnsafeGetReferenceAcquire))
850 << instruction_->AsInvoke()->GetIntrinsic();
851 DCHECK_EQ(offset_, 0U);
852 DCHECK(index_.IsRegisterPair());
853 // UnsafeGet's offset location is a register pair, the low
854 // part contains the correct offset.
855 index = index_.ToLow();
856 }
857 }
858
859 // We're moving two or three locations to locations that could
860 // overlap, so we need a parallel move resolver.
861 InvokeRuntimeCallingConvention calling_convention;
862 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
863 parallel_move.AddMove(ref_,
864 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
865 DataType::Type::kReference,
866 nullptr);
867 parallel_move.AddMove(obj_,
868 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
869 DataType::Type::kReference,
870 nullptr);
871 if (index.IsValid()) {
872 parallel_move.AddMove(index,
873 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
874 DataType::Type::kInt32,
875 nullptr);
876 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
877 } else {
878 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
879 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
880 }
881 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
882 CheckEntrypointTypes<
883 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
884 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
885
886 RestoreLiveRegisters(codegen, locations);
887 __ jmp(GetExitLabel());
888 }
889
GetDescription() const890 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
891
892 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)893 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
894 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
895 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
896 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
897 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
898 return static_cast<Register>(i);
899 }
900 }
901 // We shall never fail to find a free caller-save register, as
902 // there are more than two core caller-save registers on x86
903 // (meaning it is possible to find one which is different from
904 // `ref` and `obj`).
905 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
906 LOG(FATAL) << "Could not find a free caller-save register";
907 UNREACHABLE();
908 }
909
910 const Location out_;
911 const Location ref_;
912 const Location obj_;
913 const uint32_t offset_;
914 // An additional location containing an index to an array.
915 // Only used for HArrayGet and the UnsafeGetObject &
916 // UnsafeGetObjectVolatile intrinsics.
917 const Location index_;
918
919 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
920 };
921
922 // Slow path generating a read barrier for a GC root.
923 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
924 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)925 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
926 : SlowPathCode(instruction), out_(out), root_(root) {
927 }
928
EmitNativeCode(CodeGenerator * codegen)929 void EmitNativeCode(CodeGenerator* codegen) override {
930 DCHECK(codegen->EmitReadBarrier());
931 LocationSummary* locations = instruction_->GetLocations();
932 Register reg_out = out_.AsRegister<Register>();
933 DCHECK(locations->CanCall());
934 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
935 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
936 << "Unexpected instruction in read barrier for GC root slow path: "
937 << instruction_->DebugName();
938
939 __ Bind(GetEntryLabel());
940 SaveLiveRegisters(codegen, locations);
941
942 InvokeRuntimeCallingConvention calling_convention;
943 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
944 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
945 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
946 instruction_,
947 instruction_->GetDexPc(),
948 this);
949 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
950 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
951
952 RestoreLiveRegisters(codegen, locations);
953 __ jmp(GetExitLabel());
954 }
955
GetDescription() const956 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
957
958 private:
959 const Location out_;
960 const Location root_;
961
962 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
963 };
964
965 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
966 public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)967 explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
968
EmitNativeCode(CodeGenerator * codegen)969 void EmitNativeCode(CodeGenerator* codegen) override {
970 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
971 LocationSummary* locations = instruction_->GetLocations();
972 QuickEntrypointEnum entry_point =
973 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
974 __ Bind(GetEntryLabel());
975 SaveLiveRegisters(codegen, locations);
976 if (instruction_->IsMethodExitHook()) {
977 __ movl(EBX, Immediate(codegen->GetFrameSize()));
978 }
979 x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
980 RestoreLiveRegisters(codegen, locations);
981 __ jmp(GetExitLabel());
982 }
983
GetDescription() const984 const char* GetDescription() const override {
985 return "MethodEntryExitHooksSlowPath";
986 }
987
988 private:
989 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
990 };
991
992 class CompileOptimizedSlowPathX86 : public SlowPathCode {
993 public:
CompileOptimizedSlowPathX86(HSuspendCheck * suspend_check,uint32_t counter_address)994 CompileOptimizedSlowPathX86(HSuspendCheck* suspend_check, uint32_t counter_address)
995 : SlowPathCode(suspend_check),
996 counter_address_(counter_address) {}
997
EmitNativeCode(CodeGenerator * codegen)998 void EmitNativeCode(CodeGenerator* codegen) override {
999 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
1000 __ Bind(GetEntryLabel());
1001 __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1002 if (instruction_ != nullptr) {
1003 // Only saves full width XMM for SIMD.
1004 SaveLiveRegisters(codegen, instruction_->GetLocations());
1005 }
1006 x86_codegen->GenerateInvokeRuntime(
1007 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1008 if (instruction_ != nullptr) {
1009 // Only restores full width XMM for SIMD.
1010 RestoreLiveRegisters(codegen, instruction_->GetLocations());
1011 }
1012 __ jmp(GetExitLabel());
1013 }
1014
GetDescription() const1015 const char* GetDescription() const override {
1016 return "CompileOptimizedSlowPath";
1017 }
1018
1019 private:
1020 uint32_t counter_address_;
1021
1022 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1023 };
1024
1025 #undef __
1026 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1027 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
1028
X86Condition(IfCondition cond)1029 inline Condition X86Condition(IfCondition cond) {
1030 switch (cond) {
1031 case kCondEQ: return kEqual;
1032 case kCondNE: return kNotEqual;
1033 case kCondLT: return kLess;
1034 case kCondLE: return kLessEqual;
1035 case kCondGT: return kGreater;
1036 case kCondGE: return kGreaterEqual;
1037 case kCondB: return kBelow;
1038 case kCondBE: return kBelowEqual;
1039 case kCondA: return kAbove;
1040 case kCondAE: return kAboveEqual;
1041 }
1042 LOG(FATAL) << "Unreachable";
1043 UNREACHABLE();
1044 }
1045
1046 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1047 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1048 switch (cond) {
1049 case kCondEQ: return kEqual;
1050 case kCondNE: return kNotEqual;
1051 // Signed to unsigned, and FP to x86 name.
1052 case kCondLT: return kBelow;
1053 case kCondLE: return kBelowEqual;
1054 case kCondGT: return kAbove;
1055 case kCondGE: return kAboveEqual;
1056 // Unsigned remain unchanged.
1057 case kCondB: return kBelow;
1058 case kCondBE: return kBelowEqual;
1059 case kCondA: return kAbove;
1060 case kCondAE: return kAboveEqual;
1061 }
1062 LOG(FATAL) << "Unreachable";
1063 UNREACHABLE();
1064 }
1065
DumpCoreRegister(std::ostream & stream,int reg) const1066 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1067 stream << Register(reg);
1068 }
1069
DumpFloatingPointRegister(std::ostream & stream,int reg) const1070 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1071 stream << XmmRegister(reg);
1072 }
1073
GetInstructionSetFeatures() const1074 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1075 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1076 }
1077
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1078 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1079 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1080 return kX86WordSize;
1081 }
1082
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1083 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1084 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1085 return kX86WordSize;
1086 }
1087
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1088 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1089 if (GetGraph()->HasSIMD()) {
1090 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1091 } else {
1092 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1093 }
1094 return GetSlowPathFPWidth();
1095 }
1096
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1097 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1098 if (GetGraph()->HasSIMD()) {
1099 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1100 } else {
1101 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1102 }
1103 return GetSlowPathFPWidth();
1104 }
1105
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1106 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1107 HInstruction* instruction,
1108 uint32_t dex_pc,
1109 SlowPathCode* slow_path) {
1110 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1111 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1112 if (EntrypointRequiresStackMap(entrypoint)) {
1113 RecordPcInfo(instruction, dex_pc, slow_path);
1114 }
1115 }
1116
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1117 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1118 HInstruction* instruction,
1119 SlowPathCode* slow_path) {
1120 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1121 GenerateInvokeRuntime(entry_point_offset);
1122 }
1123
GenerateInvokeRuntime(int32_t entry_point_offset)1124 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1125 __ fs()->call(Address::Absolute(entry_point_offset));
1126 }
1127
1128 namespace detail {
1129
1130 // Mark which intrinsics we don't have handcrafted code for.
1131 template <Intrinsics T>
1132 struct IsUnimplemented {
1133 bool is_unimplemented = false;
1134 };
1135
1136 #define TRUE_OVERRIDE(Name) \
1137 template <> \
1138 struct IsUnimplemented<Intrinsics::k##Name> { \
1139 bool is_unimplemented = true; \
1140 };
1141 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1142 #undef TRUE_OVERRIDE
1143
1144 static constexpr bool kIsIntrinsicUnimplemented[] = {
1145 false, // kNone
1146 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1147 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1148 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1149 #undef IS_UNIMPLEMENTED
1150 };
1151
1152 } // namespace detail
1153
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1154 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1155 const CompilerOptions& compiler_options,
1156 OptimizingCompilerStats* stats)
1157 : CodeGenerator(graph,
1158 kNumberOfCpuRegisters,
1159 kNumberOfXmmRegisters,
1160 kNumberOfRegisterPairs,
1161 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1162 | (1 << kFakeReturnRegister),
1163 0,
1164 compiler_options,
1165 stats,
1166 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1167 block_labels_(nullptr),
1168 location_builder_(graph, this),
1169 instruction_visitor_(graph, this),
1170 move_resolver_(graph->GetAllocator(), this),
1171 assembler_(graph->GetAllocator(),
1172 compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1173 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1174 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1175 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1176 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1177 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1178 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1179 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1180 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1181 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1182 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1183 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1184 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1185 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1186 constant_area_start_(-1),
1187 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1188 method_address_offset_(std::less<uint32_t>(),
1189 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1190 // Use a fake return address register to mimic Quick.
1191 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1192 }
1193
SetupBlockedRegisters() const1194 void CodeGeneratorX86::SetupBlockedRegisters() const {
1195 // Stack register is always reserved.
1196 blocked_core_registers_[ESP] = true;
1197 }
1198
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1199 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1200 : InstructionCodeGenerator(graph, codegen),
1201 assembler_(codegen->GetAssembler()),
1202 codegen_(codegen) {}
1203
DWARFReg(Register reg)1204 static dwarf::Reg DWARFReg(Register reg) {
1205 return dwarf::Reg::X86Core(static_cast<int>(reg));
1206 }
1207
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1208 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1209 switch (ret->InputAt(0)->GetType()) {
1210 case DataType::Type::kReference:
1211 case DataType::Type::kBool:
1212 case DataType::Type::kUint8:
1213 case DataType::Type::kInt8:
1214 case DataType::Type::kUint16:
1215 case DataType::Type::kInt16:
1216 case DataType::Type::kInt32:
1217 locations->SetInAt(0, Location::RegisterLocation(EAX));
1218 break;
1219
1220 case DataType::Type::kInt64:
1221 locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1222 break;
1223
1224 case DataType::Type::kFloat32:
1225 case DataType::Type::kFloat64:
1226 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1227 break;
1228
1229 case DataType::Type::kVoid:
1230 locations->SetInAt(0, Location::NoLocation());
1231 break;
1232
1233 default:
1234 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1235 }
1236 }
1237
VisitMethodExitHook(HMethodExitHook * method_hook)1238 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1239 LocationSummary* locations = new (GetGraph()->GetAllocator())
1240 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1241 SetInForReturnValue(method_hook, locations);
1242 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1243 locations->AddTemp(Location::RegisterLocation(EAX));
1244 locations->AddTemp(Location::RegisterLocation(EDX));
1245 // An additional temporary register to hold address to store the timestamp counter.
1246 locations->AddTemp(Location::RequiresRegister());
1247 }
1248
GenerateMethodEntryExitHook(HInstruction * instruction)1249 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1250 SlowPathCode* slow_path =
1251 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1252 codegen_->AddSlowPath(slow_path);
1253 LocationSummary* locations = instruction->GetLocations();
1254
1255 if (instruction->IsMethodExitHook()) {
1256 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1257 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1258 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1259 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1260 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1261 __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1262 __ j(kNotEqual, slow_path->GetEntryLabel());
1263 }
1264
1265 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1266 MemberOffset offset = instruction->IsMethodExitHook() ?
1267 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1268 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1269 __ cmpb(Address::Absolute(address + offset.Int32Value()),
1270 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1271 // Check if there are any trace method entry / exit listeners. If no, continue.
1272 __ j(kLess, slow_path->GetExitLabel());
1273 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1274 // If yes, just take the slow path.
1275 __ j(kGreater, slow_path->GetEntryLabel());
1276
1277 // For entry_addr use the first temp that isn't EAX or EDX. We need this after
1278 // rdtsc which returns values in EAX + EDX.
1279 Register entry_addr = locations->GetTemp(2).AsRegister<Register>();
1280 Register index = locations->GetTemp(1).AsRegister<Register>();
1281
1282 // Check if there is place in the buffer for a new entry, if no, take slow path.
1283 uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
1284 uint64_t trace_buffer_index_offset =
1285 Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value();
1286
1287 __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset));
1288 __ subl(index, Immediate(kNumEntriesForWallClock));
1289 __ j(kLess, slow_path->GetEntryLabel());
1290
1291 // Update the index in the `Thread`.
1292 __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index);
1293 // Calculate the entry address in the buffer.
1294 // entry_addr = base_addr + sizeof(void*) * index
1295 __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr));
1296 __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0));
1297
1298 // Record method pointer and trace action.
1299 Register method = index;
1300 __ movl(method, Address(ESP, kCurrentMethodStackOffset));
1301 // Use last two bits to encode trace method action. For MethodEntry it is 0
1302 // so no need to set the bits since they are 0 already.
1303 if (instruction->IsMethodExitHook()) {
1304 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1305 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1306 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1307 __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1308 }
1309 __ movl(Address(entry_addr, kMethodOffsetInBytes), method);
1310 // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
1311 __ rdtsc();
1312 __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX);
1313 __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX);
1314 __ Bind(slow_path->GetExitLabel());
1315 }
1316
VisitMethodExitHook(HMethodExitHook * instruction)1317 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1318 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1319 DCHECK(codegen_->RequiresCurrentMethod());
1320 GenerateMethodEntryExitHook(instruction);
1321 }
1322
VisitMethodEntryHook(HMethodEntryHook * method_hook)1323 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1324 LocationSummary* locations = new (GetGraph()->GetAllocator())
1325 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1326 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1327 locations->AddTemp(Location::RegisterLocation(EAX));
1328 locations->AddTemp(Location::RegisterLocation(EDX));
1329 // An additional temporary register to hold address to store the timestamp counter.
1330 locations->AddTemp(Location::RequiresRegister());
1331 }
1332
VisitMethodEntryHook(HMethodEntryHook * instruction)1333 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1334 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1335 DCHECK(codegen_->RequiresCurrentMethod());
1336 GenerateMethodEntryExitHook(instruction);
1337 }
1338
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1339 void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1340 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1341 Register reg = EAX;
1342 if (is_frame_entry) {
1343 reg = kMethodRegisterArgument;
1344 } else {
1345 __ pushl(EAX);
1346 __ cfi().AdjustCFAOffset(4);
1347 __ movl(EAX, Address(ESP, kX86WordSize));
1348 }
1349 NearLabel overflow;
1350 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1351 Immediate(interpreter::kNterpHotnessValue));
1352 __ j(kEqual, &overflow);
1353 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1354 __ Bind(&overflow);
1355 if (!is_frame_entry) {
1356 __ popl(EAX);
1357 __ cfi().AdjustCFAOffset(-4);
1358 }
1359 }
1360
1361 if (GetGraph()->IsCompilingBaseline() &&
1362 GetGraph()->IsUsefulOptimizing() &&
1363 !Runtime::Current()->IsAotCompiler()) {
1364 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1365 DCHECK(info != nullptr);
1366 uint32_t address = reinterpret_cast32<uint32_t>(info) +
1367 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1368 DCHECK(!HasEmptyFrame());
1369 SlowPathCode* slow_path =
1370 new (GetScopedAllocator()) CompileOptimizedSlowPathX86(suspend_check, address);
1371 AddSlowPath(slow_path);
1372 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1373 // it reaching 0. Also, at this point we have no register available to look
1374 // at the counter directly.
1375 __ addw(Address::Absolute(address), Immediate(-1));
1376 __ j(kEqual, slow_path->GetEntryLabel());
1377 __ Bind(slow_path->GetExitLabel());
1378 }
1379 }
1380
GenerateFrameEntry()1381 void CodeGeneratorX86::GenerateFrameEntry() {
1382 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1383
1384 // Check if we need to generate the clinit check. We will jump to the
1385 // resolution stub if the class is not initialized and the executing thread is
1386 // not the thread initializing it.
1387 // We do this before constructing the frame to get the correct stack trace if
1388 // an exception is thrown.
1389 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1390 NearLabel continue_execution, resolution;
1391 // We'll use EBP as temporary.
1392 __ pushl(EBP);
1393 __ cfi().AdjustCFAOffset(4);
1394 // Check if we're visibly initialized.
1395
1396 // We don't emit a read barrier here to save on code size. We rely on the
1397 // resolution trampoline to do a suspend check before re-entering this code.
1398 __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1399 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
1400 __ j(kAboveEqual, &continue_execution);
1401
1402 // Check if we're initializing and the thread initializing is the one
1403 // executing the code.
1404 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedInitializingValue));
1405 __ j(kBelow, &resolution);
1406
1407 __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1408 __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1409 __ j(kEqual, &continue_execution);
1410 __ Bind(&resolution);
1411
1412 __ popl(EBP);
1413 __ cfi().AdjustCFAOffset(-4);
1414 // Jump to the resolution stub.
1415 ThreadOffset32 entrypoint_offset =
1416 GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1417 __ fs()->jmp(Address::Absolute(entrypoint_offset));
1418
1419 __ Bind(&continue_execution);
1420 __ cfi().AdjustCFAOffset(4); // Undo the `-4` adjustment above. We get here with EBP pushed.
1421 __ popl(EBP);
1422 __ cfi().AdjustCFAOffset(-4);
1423 }
1424
1425 __ Bind(&frame_entry_label_);
1426 bool skip_overflow_check =
1427 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1428 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1429
1430 if (!skip_overflow_check) {
1431 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1432 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1433 RecordPcInfo(nullptr, 0);
1434 }
1435
1436 if (!HasEmptyFrame()) {
1437 // Make sure the frame size isn't unreasonably large.
1438 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1439
1440 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1441 Register reg = kCoreCalleeSaves[i];
1442 if (allocated_registers_.ContainsCoreRegister(reg)) {
1443 __ pushl(reg);
1444 __ cfi().AdjustCFAOffset(kX86WordSize);
1445 __ cfi().RelOffset(DWARFReg(reg), 0);
1446 }
1447 }
1448
1449 int adjust = GetFrameSize() - FrameEntrySpillSize();
1450 IncreaseFrame(adjust);
1451 // Save the current method if we need it. Note that we do not
1452 // do this in HCurrentMethod, as the instruction might have been removed
1453 // in the SSA graph.
1454 if (RequiresCurrentMethod()) {
1455 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1456 }
1457
1458 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1459 // Initialize should_deoptimize flag to 0.
1460 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1461 }
1462 }
1463
1464 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1465 }
1466
GenerateFrameExit()1467 void CodeGeneratorX86::GenerateFrameExit() {
1468 __ cfi().RememberState();
1469 if (!HasEmptyFrame()) {
1470 int adjust = GetFrameSize() - FrameEntrySpillSize();
1471 DecreaseFrame(adjust);
1472
1473 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1474 Register reg = kCoreCalleeSaves[i];
1475 if (allocated_registers_.ContainsCoreRegister(reg)) {
1476 __ popl(reg);
1477 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1478 __ cfi().Restore(DWARFReg(reg));
1479 }
1480 }
1481 }
1482 __ ret();
1483 __ cfi().RestoreState();
1484 __ cfi().DefCFAOffset(GetFrameSize());
1485 }
1486
Bind(HBasicBlock * block)1487 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1488 __ Bind(GetLabelOf(block));
1489 }
1490
GetReturnLocation(DataType::Type type) const1491 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1492 switch (type) {
1493 case DataType::Type::kReference:
1494 case DataType::Type::kBool:
1495 case DataType::Type::kUint8:
1496 case DataType::Type::kInt8:
1497 case DataType::Type::kUint16:
1498 case DataType::Type::kInt16:
1499 case DataType::Type::kUint32:
1500 case DataType::Type::kInt32:
1501 return Location::RegisterLocation(EAX);
1502
1503 case DataType::Type::kUint64:
1504 case DataType::Type::kInt64:
1505 return Location::RegisterPairLocation(EAX, EDX);
1506
1507 case DataType::Type::kVoid:
1508 return Location::NoLocation();
1509
1510 case DataType::Type::kFloat64:
1511 case DataType::Type::kFloat32:
1512 return Location::FpuRegisterLocation(XMM0);
1513 }
1514 }
1515
GetMethodLocation() const1516 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1517 return Location::RegisterLocation(kMethodRegisterArgument);
1518 }
1519
GetNextLocation(DataType::Type type)1520 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1521 switch (type) {
1522 case DataType::Type::kReference:
1523 case DataType::Type::kBool:
1524 case DataType::Type::kUint8:
1525 case DataType::Type::kInt8:
1526 case DataType::Type::kUint16:
1527 case DataType::Type::kInt16:
1528 case DataType::Type::kInt32: {
1529 uint32_t index = gp_index_++;
1530 stack_index_++;
1531 if (index < calling_convention.GetNumberOfRegisters()) {
1532 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1533 } else {
1534 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1535 }
1536 }
1537
1538 case DataType::Type::kInt64: {
1539 uint32_t index = gp_index_;
1540 gp_index_ += 2;
1541 stack_index_ += 2;
1542 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1543 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1544 calling_convention.GetRegisterPairAt(index));
1545 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1546 } else {
1547 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1548 }
1549 }
1550
1551 case DataType::Type::kFloat32: {
1552 uint32_t index = float_index_++;
1553 stack_index_++;
1554 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1555 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1556 } else {
1557 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1558 }
1559 }
1560
1561 case DataType::Type::kFloat64: {
1562 uint32_t index = float_index_++;
1563 stack_index_ += 2;
1564 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1565 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1566 } else {
1567 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1568 }
1569 }
1570
1571 case DataType::Type::kUint32:
1572 case DataType::Type::kUint64:
1573 case DataType::Type::kVoid:
1574 LOG(FATAL) << "Unexpected parameter type " << type;
1575 UNREACHABLE();
1576 }
1577 return Location::NoLocation();
1578 }
1579
GetNextLocation(DataType::Type type)1580 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1581 DCHECK_NE(type, DataType::Type::kReference);
1582
1583 Location location;
1584 if (DataType::Is64BitType(type)) {
1585 location = Location::DoubleStackSlot(stack_offset_);
1586 stack_offset_ += 2 * kFramePointerSize;
1587 } else {
1588 location = Location::StackSlot(stack_offset_);
1589 stack_offset_ += kFramePointerSize;
1590 }
1591 if (for_register_allocation_) {
1592 location = Location::Any();
1593 }
1594 return location;
1595 }
1596
GetReturnLocation(DataType::Type type) const1597 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1598 // We perform conversion to the managed ABI return register after the call if needed.
1599 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1600 return dex_calling_convention.GetReturnLocation(type);
1601 }
1602
GetMethodLocation() const1603 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1604 // Pass the method in the hidden argument EAX.
1605 return Location::RegisterLocation(EAX);
1606 }
1607
Move32(Location destination,Location source)1608 void CodeGeneratorX86::Move32(Location destination, Location source) {
1609 if (source.Equals(destination)) {
1610 return;
1611 }
1612 if (destination.IsRegister()) {
1613 if (source.IsRegister()) {
1614 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1615 } else if (source.IsFpuRegister()) {
1616 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1617 } else if (source.IsConstant()) {
1618 int32_t value = GetInt32ValueOf(source.GetConstant());
1619 __ movl(destination.AsRegister<Register>(), Immediate(value));
1620 } else {
1621 DCHECK(source.IsStackSlot());
1622 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1623 }
1624 } else if (destination.IsFpuRegister()) {
1625 if (source.IsRegister()) {
1626 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1627 } else if (source.IsFpuRegister()) {
1628 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1629 } else {
1630 DCHECK(source.IsStackSlot());
1631 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1632 }
1633 } else {
1634 DCHECK(destination.IsStackSlot()) << destination;
1635 if (source.IsRegister()) {
1636 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1637 } else if (source.IsFpuRegister()) {
1638 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1639 } else if (source.IsConstant()) {
1640 HConstant* constant = source.GetConstant();
1641 int32_t value = GetInt32ValueOf(constant);
1642 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1643 } else {
1644 DCHECK(source.IsStackSlot());
1645 __ pushl(Address(ESP, source.GetStackIndex()));
1646 __ popl(Address(ESP, destination.GetStackIndex()));
1647 }
1648 }
1649 }
1650
Move64(Location destination,Location source)1651 void CodeGeneratorX86::Move64(Location destination, Location source) {
1652 if (source.Equals(destination)) {
1653 return;
1654 }
1655 if (destination.IsRegisterPair()) {
1656 if (source.IsRegisterPair()) {
1657 EmitParallelMoves(
1658 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1659 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1660 DataType::Type::kInt32,
1661 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1662 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1663 DataType::Type::kInt32);
1664 } else if (source.IsFpuRegister()) {
1665 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1666 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1667 __ psrlq(src_reg, Immediate(32));
1668 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1669 } else {
1670 // No conflict possible, so just do the moves.
1671 DCHECK(source.IsDoubleStackSlot());
1672 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1673 __ movl(destination.AsRegisterPairHigh<Register>(),
1674 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1675 }
1676 } else if (destination.IsFpuRegister()) {
1677 if (source.IsFpuRegister()) {
1678 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1679 } else if (source.IsDoubleStackSlot()) {
1680 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1681 } else if (source.IsRegisterPair()) {
1682 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1683 // Push the 2 source registers to the stack.
1684 __ pushl(source.AsRegisterPairHigh<Register>());
1685 __ cfi().AdjustCFAOffset(elem_size);
1686 __ pushl(source.AsRegisterPairLow<Register>());
1687 __ cfi().AdjustCFAOffset(elem_size);
1688 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1689 // And remove the temporary stack space we allocated.
1690 DecreaseFrame(2 * elem_size);
1691 } else {
1692 LOG(FATAL) << "Unimplemented";
1693 }
1694 } else {
1695 DCHECK(destination.IsDoubleStackSlot()) << destination;
1696 if (source.IsRegisterPair()) {
1697 // No conflict possible, so just do the moves.
1698 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1699 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1700 source.AsRegisterPairHigh<Register>());
1701 } else if (source.IsFpuRegister()) {
1702 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1703 } else if (source.IsConstant()) {
1704 HConstant* constant = source.GetConstant();
1705 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1706 int64_t value = GetInt64ValueOf(constant);
1707 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1708 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1709 Immediate(High32Bits(value)));
1710 } else {
1711 DCHECK(source.IsDoubleStackSlot()) << source;
1712 EmitParallelMoves(
1713 Location::StackSlot(source.GetStackIndex()),
1714 Location::StackSlot(destination.GetStackIndex()),
1715 DataType::Type::kInt32,
1716 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1717 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1718 DataType::Type::kInt32);
1719 }
1720 }
1721 }
1722
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1723 static Address CreateAddress(Register base,
1724 Register index = Register::kNoRegister,
1725 ScaleFactor scale = TIMES_1,
1726 int32_t disp = 0) {
1727 if (index == Register::kNoRegister) {
1728 return Address(base, disp);
1729 }
1730
1731 return Address(base, index, scale, disp);
1732 }
1733
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1734 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1735 Location dst,
1736 Address src,
1737 HInstruction* instr,
1738 XmmRegister temp,
1739 bool is_atomic_load) {
1740 switch (dst_type) {
1741 case DataType::Type::kBool:
1742 case DataType::Type::kUint8:
1743 __ movzxb(dst.AsRegister<Register>(), src);
1744 break;
1745 case DataType::Type::kInt8:
1746 __ movsxb(dst.AsRegister<Register>(), src);
1747 break;
1748 case DataType::Type::kInt16:
1749 __ movsxw(dst.AsRegister<Register>(), src);
1750 break;
1751 case DataType::Type::kUint16:
1752 __ movzxw(dst.AsRegister<Register>(), src);
1753 break;
1754 case DataType::Type::kInt32:
1755 __ movl(dst.AsRegister<Register>(), src);
1756 break;
1757 case DataType::Type::kInt64: {
1758 if (is_atomic_load) {
1759 __ movsd(temp, src);
1760 if (instr != nullptr) {
1761 MaybeRecordImplicitNullCheck(instr);
1762 }
1763 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1764 __ psrlq(temp, Immediate(32));
1765 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1766 } else {
1767 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1768 Address src_high = Address::displace(src, kX86WordSize);
1769 __ movl(dst.AsRegisterPairLow<Register>(), src);
1770 if (instr != nullptr) {
1771 MaybeRecordImplicitNullCheck(instr);
1772 }
1773 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1774 }
1775 break;
1776 }
1777 case DataType::Type::kFloat32:
1778 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1779 break;
1780 case DataType::Type::kFloat64:
1781 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1782 break;
1783 case DataType::Type::kReference:
1784 DCHECK(!EmitReadBarrier());
1785 __ movl(dst.AsRegister<Register>(), src);
1786 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1787 break;
1788 default:
1789 LOG(FATAL) << "Unreachable type " << dst_type;
1790 }
1791 if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1792 // kInt64 needs special handling that is done in the above switch.
1793 MaybeRecordImplicitNullCheck(instr);
1794 }
1795 }
1796
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1797 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1798 Location src,
1799 Register dst_base,
1800 Register dst_index,
1801 ScaleFactor dst_scale,
1802 int32_t dst_disp) {
1803 DCHECK(dst_base != Register::kNoRegister);
1804 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1805
1806 switch (src_type) {
1807 case DataType::Type::kBool:
1808 case DataType::Type::kUint8:
1809 case DataType::Type::kInt8: {
1810 if (src.IsConstant()) {
1811 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1812 } else {
1813 __ movb(dst, src.AsRegister<ByteRegister>());
1814 }
1815 break;
1816 }
1817 case DataType::Type::kUint16:
1818 case DataType::Type::kInt16: {
1819 if (src.IsConstant()) {
1820 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1821 } else {
1822 __ movw(dst, src.AsRegister<Register>());
1823 }
1824 break;
1825 }
1826 case DataType::Type::kUint32:
1827 case DataType::Type::kInt32: {
1828 if (src.IsConstant()) {
1829 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1830 __ movl(dst, Immediate(v));
1831 } else {
1832 __ movl(dst, src.AsRegister<Register>());
1833 }
1834 break;
1835 }
1836 case DataType::Type::kUint64:
1837 case DataType::Type::kInt64: {
1838 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1839 if (src.IsConstant()) {
1840 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1841 __ movl(dst, Immediate(Low32Bits(v)));
1842 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1843 } else {
1844 __ movl(dst, src.AsRegisterPairLow<Register>());
1845 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1846 }
1847 break;
1848 }
1849 case DataType::Type::kFloat32: {
1850 if (src.IsConstant()) {
1851 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1852 __ movl(dst, Immediate(v));
1853 } else {
1854 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1855 }
1856 break;
1857 }
1858 case DataType::Type::kFloat64: {
1859 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1860 if (src.IsConstant()) {
1861 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1862 __ movl(dst, Immediate(Low32Bits(v)));
1863 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1864 } else {
1865 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1866 }
1867 break;
1868 }
1869 case DataType::Type::kVoid:
1870 case DataType::Type::kReference:
1871 LOG(FATAL) << "Unreachable type " << src_type;
1872 }
1873 }
1874
MoveConstant(Location location,int32_t value)1875 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1876 DCHECK(location.IsRegister());
1877 __ movl(location.AsRegister<Register>(), Immediate(value));
1878 }
1879
MoveLocation(Location dst,Location src,DataType::Type dst_type)1880 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1881 HParallelMove move(GetGraph()->GetAllocator());
1882 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1883 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1884 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1885 } else {
1886 move.AddMove(src, dst, dst_type, nullptr);
1887 }
1888 GetMoveResolver()->EmitNativeCode(&move);
1889 }
1890
AddLocationAsTemp(Location location,LocationSummary * locations)1891 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1892 if (location.IsRegister()) {
1893 locations->AddTemp(location);
1894 } else if (location.IsRegisterPair()) {
1895 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1896 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1897 } else {
1898 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1899 }
1900 }
1901
HandleGoto(HInstruction * got,HBasicBlock * successor)1902 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1903 if (successor->IsExitBlock()) {
1904 DCHECK(got->GetPrevious()->AlwaysThrows());
1905 return; // no code needed
1906 }
1907
1908 HBasicBlock* block = got->GetBlock();
1909 HInstruction* previous = got->GetPrevious();
1910
1911 HLoopInformation* info = block->GetLoopInformation();
1912 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1913 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
1914 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1915 return;
1916 }
1917
1918 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1919 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1920 }
1921 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1922 __ jmp(codegen_->GetLabelOf(successor));
1923 }
1924 }
1925
VisitGoto(HGoto * got)1926 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1927 got->SetLocations(nullptr);
1928 }
1929
VisitGoto(HGoto * got)1930 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1931 HandleGoto(got, got->GetSuccessor());
1932 }
1933
VisitTryBoundary(HTryBoundary * try_boundary)1934 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1935 try_boundary->SetLocations(nullptr);
1936 }
1937
VisitTryBoundary(HTryBoundary * try_boundary)1938 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1939 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1940 if (!successor->IsExitBlock()) {
1941 HandleGoto(try_boundary, successor);
1942 }
1943 }
1944
VisitExit(HExit * exit)1945 void LocationsBuilderX86::VisitExit(HExit* exit) {
1946 exit->SetLocations(nullptr);
1947 }
1948
VisitExit(HExit * exit)1949 void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
1950
1951 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1952 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1953 LabelType* true_label,
1954 LabelType* false_label) {
1955 if (cond->IsFPConditionTrueIfNaN()) {
1956 __ j(kUnordered, true_label);
1957 } else if (cond->IsFPConditionFalseIfNaN()) {
1958 __ j(kUnordered, false_label);
1959 }
1960 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1961 }
1962
1963 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1964 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1965 LabelType* true_label,
1966 LabelType* false_label) {
1967 LocationSummary* locations = cond->GetLocations();
1968 Location left = locations->InAt(0);
1969 Location right = locations->InAt(1);
1970 IfCondition if_cond = cond->GetCondition();
1971
1972 Register left_high = left.AsRegisterPairHigh<Register>();
1973 Register left_low = left.AsRegisterPairLow<Register>();
1974 IfCondition true_high_cond = if_cond;
1975 IfCondition false_high_cond = cond->GetOppositeCondition();
1976 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1977
1978 // Set the conditions for the test, remembering that == needs to be
1979 // decided using the low words.
1980 switch (if_cond) {
1981 case kCondEQ:
1982 case kCondNE:
1983 // Nothing to do.
1984 break;
1985 case kCondLT:
1986 false_high_cond = kCondGT;
1987 break;
1988 case kCondLE:
1989 true_high_cond = kCondLT;
1990 break;
1991 case kCondGT:
1992 false_high_cond = kCondLT;
1993 break;
1994 case kCondGE:
1995 true_high_cond = kCondGT;
1996 break;
1997 case kCondB:
1998 false_high_cond = kCondA;
1999 break;
2000 case kCondBE:
2001 true_high_cond = kCondB;
2002 break;
2003 case kCondA:
2004 false_high_cond = kCondB;
2005 break;
2006 case kCondAE:
2007 true_high_cond = kCondA;
2008 break;
2009 }
2010
2011 if (right.IsConstant()) {
2012 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
2013 int32_t val_high = High32Bits(value);
2014 int32_t val_low = Low32Bits(value);
2015
2016 codegen_->Compare32BitValue(left_high, val_high);
2017 if (if_cond == kCondNE) {
2018 __ j(X86Condition(true_high_cond), true_label);
2019 } else if (if_cond == kCondEQ) {
2020 __ j(X86Condition(false_high_cond), false_label);
2021 } else {
2022 __ j(X86Condition(true_high_cond), true_label);
2023 __ j(X86Condition(false_high_cond), false_label);
2024 }
2025 // Must be equal high, so compare the lows.
2026 codegen_->Compare32BitValue(left_low, val_low);
2027 } else if (right.IsRegisterPair()) {
2028 Register right_high = right.AsRegisterPairHigh<Register>();
2029 Register right_low = right.AsRegisterPairLow<Register>();
2030
2031 __ cmpl(left_high, right_high);
2032 if (if_cond == kCondNE) {
2033 __ j(X86Condition(true_high_cond), true_label);
2034 } else if (if_cond == kCondEQ) {
2035 __ j(X86Condition(false_high_cond), false_label);
2036 } else {
2037 __ j(X86Condition(true_high_cond), true_label);
2038 __ j(X86Condition(false_high_cond), false_label);
2039 }
2040 // Must be equal high, so compare the lows.
2041 __ cmpl(left_low, right_low);
2042 } else {
2043 DCHECK(right.IsDoubleStackSlot());
2044 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
2045 if (if_cond == kCondNE) {
2046 __ j(X86Condition(true_high_cond), true_label);
2047 } else if (if_cond == kCondEQ) {
2048 __ j(X86Condition(false_high_cond), false_label);
2049 } else {
2050 __ j(X86Condition(true_high_cond), true_label);
2051 __ j(X86Condition(false_high_cond), false_label);
2052 }
2053 // Must be equal high, so compare the lows.
2054 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
2055 }
2056 // The last comparison might be unsigned.
2057 __ j(final_condition, true_label);
2058 }
2059
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)2060 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
2061 Location rhs,
2062 HInstruction* insn,
2063 bool is_double) {
2064 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
2065 if (is_double) {
2066 if (rhs.IsFpuRegister()) {
2067 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2068 } else if (const_area != nullptr) {
2069 DCHECK(const_area->IsEmittedAtUseSite());
2070 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
2071 codegen_->LiteralDoubleAddress(
2072 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
2073 const_area->GetBaseMethodAddress(),
2074 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2075 } else {
2076 DCHECK(rhs.IsDoubleStackSlot());
2077 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2078 }
2079 } else {
2080 if (rhs.IsFpuRegister()) {
2081 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2082 } else if (const_area != nullptr) {
2083 DCHECK(const_area->IsEmittedAtUseSite());
2084 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2085 codegen_->LiteralFloatAddress(
2086 const_area->GetConstant()->AsFloatConstant()->GetValue(),
2087 const_area->GetBaseMethodAddress(),
2088 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2089 } else {
2090 DCHECK(rhs.IsStackSlot());
2091 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2092 }
2093 }
2094 }
2095
2096 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2097 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2098 LabelType* true_target_in,
2099 LabelType* false_target_in) {
2100 // Generated branching requires both targets to be explicit. If either of the
2101 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2102 LabelType fallthrough_target;
2103 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2104 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2105
2106 LocationSummary* locations = condition->GetLocations();
2107 Location left = locations->InAt(0);
2108 Location right = locations->InAt(1);
2109
2110 DataType::Type type = condition->InputAt(0)->GetType();
2111 switch (type) {
2112 case DataType::Type::kInt64:
2113 GenerateLongComparesAndJumps(condition, true_target, false_target);
2114 break;
2115 case DataType::Type::kFloat32:
2116 GenerateFPCompare(left, right, condition, false);
2117 GenerateFPJumps(condition, true_target, false_target);
2118 break;
2119 case DataType::Type::kFloat64:
2120 GenerateFPCompare(left, right, condition, true);
2121 GenerateFPJumps(condition, true_target, false_target);
2122 break;
2123 default:
2124 LOG(FATAL) << "Unexpected compare type " << type;
2125 }
2126
2127 if (false_target != &fallthrough_target) {
2128 __ jmp(false_target);
2129 }
2130
2131 if (fallthrough_target.IsLinked()) {
2132 __ Bind(&fallthrough_target);
2133 }
2134 }
2135
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2136 static bool AreEflagsSetFrom(HInstruction* cond,
2137 HInstruction* branch,
2138 const CompilerOptions& compiler_options) {
2139 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2140 // are set only strictly before `branch`. We can't use the eflags on long/FP
2141 // conditions if they are materialized due to the complex branching.
2142 return cond->IsCondition() &&
2143 cond->GetNext() == branch &&
2144 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2145 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2146 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2147 compiler_options.ProfileBranches());
2148 }
2149
2150 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2151 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2152 size_t condition_input_index,
2153 LabelType* true_target,
2154 LabelType* false_target) {
2155 HInstruction* cond = instruction->InputAt(condition_input_index);
2156
2157 if (true_target == nullptr && false_target == nullptr) {
2158 // Nothing to do. The code always falls through.
2159 return;
2160 } else if (cond->IsIntConstant()) {
2161 // Constant condition, statically compared against "true" (integer value 1).
2162 if (cond->AsIntConstant()->IsTrue()) {
2163 if (true_target != nullptr) {
2164 __ jmp(true_target);
2165 }
2166 } else {
2167 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2168 if (false_target != nullptr) {
2169 __ jmp(false_target);
2170 }
2171 }
2172 return;
2173 }
2174
2175 // The following code generates these patterns:
2176 // (1) true_target == nullptr && false_target != nullptr
2177 // - opposite condition true => branch to false_target
2178 // (2) true_target != nullptr && false_target == nullptr
2179 // - condition true => branch to true_target
2180 // (3) true_target != nullptr && false_target != nullptr
2181 // - condition true => branch to true_target
2182 // - branch to false_target
2183 if (IsBooleanValueOrMaterializedCondition(cond)) {
2184 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2185 if (true_target == nullptr) {
2186 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2187 } else {
2188 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2189 }
2190 } else {
2191 // Materialized condition, compare against 0.
2192 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2193 if (lhs.IsRegister()) {
2194 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2195 } else {
2196 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2197 }
2198 if (true_target == nullptr) {
2199 __ j(kEqual, false_target);
2200 } else {
2201 __ j(kNotEqual, true_target);
2202 }
2203 }
2204 } else {
2205 // Condition has not been materialized, use its inputs as the comparison and
2206 // its condition as the branch condition.
2207 HCondition* condition = cond->AsCondition();
2208
2209 // If this is a long or FP comparison that has been folded into
2210 // the HCondition, generate the comparison directly.
2211 DataType::Type type = condition->InputAt(0)->GetType();
2212 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2213 GenerateCompareTestAndBranch(condition, true_target, false_target);
2214 return;
2215 }
2216
2217 Location lhs = condition->GetLocations()->InAt(0);
2218 Location rhs = condition->GetLocations()->InAt(1);
2219 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2220 codegen_->GenerateIntCompare(lhs, rhs);
2221 if (true_target == nullptr) {
2222 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2223 } else {
2224 __ j(X86Condition(condition->GetCondition()), true_target);
2225 }
2226 }
2227
2228 // If neither branch falls through (case 3), the conditional branch to `true_target`
2229 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2230 if (true_target != nullptr && false_target != nullptr) {
2231 __ jmp(false_target);
2232 }
2233 }
2234
VisitIf(HIf * if_instr)2235 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2236 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2237 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2238 if (GetGraph()->IsCompilingBaseline() &&
2239 codegen_->GetCompilerOptions().ProfileBranches() &&
2240 !Runtime::Current()->IsAotCompiler()) {
2241 locations->SetInAt(0, Location::RequiresRegister());
2242 locations->AddTemp(Location::RequiresRegister());
2243 locations->AddTemp(Location::RequiresRegister());
2244 } else {
2245 locations->SetInAt(0, Location::Any());
2246 }
2247 }
2248 }
2249
VisitIf(HIf * if_instr)2250 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2251 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2252 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2253 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2254 nullptr : codegen_->GetLabelOf(true_successor);
2255 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2256 nullptr : codegen_->GetLabelOf(false_successor);
2257 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2258 if (GetGraph()->IsCompilingBaseline() &&
2259 codegen_->GetCompilerOptions().ProfileBranches() &&
2260 !Runtime::Current()->IsAotCompiler()) {
2261 DCHECK(if_instr->InputAt(0)->IsCondition());
2262 Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
2263 Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
2264 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2265 DCHECK(info != nullptr);
2266 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2267 // Currently, not all If branches are profiled.
2268 if (cache != nullptr) {
2269 uint64_t address =
2270 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2271 static_assert(
2272 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2273 "Unexpected offsets for BranchCache");
2274 NearLabel done;
2275 Location lhs = if_instr->GetLocations()->InAt(0);
2276 __ movl(temp, Immediate(address));
2277 __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
2278 __ addw(counter, Immediate(1));
2279 __ j(kEqual, &done);
2280 __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
2281 __ Bind(&done);
2282 }
2283 }
2284 }
2285 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2286 }
2287
VisitDeoptimize(HDeoptimize * deoptimize)2288 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2289 LocationSummary* locations = new (GetGraph()->GetAllocator())
2290 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2291 InvokeRuntimeCallingConvention calling_convention;
2292 RegisterSet caller_saves = RegisterSet::Empty();
2293 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2294 locations->SetCustomSlowPathCallerSaves(caller_saves);
2295 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2296 locations->SetInAt(0, Location::Any());
2297 }
2298 }
2299
VisitDeoptimize(HDeoptimize * deoptimize)2300 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2301 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2302 GenerateTestAndBranch<Label>(deoptimize,
2303 /* condition_input_index= */ 0,
2304 slow_path->GetEntryLabel(),
2305 /* false_target= */ nullptr);
2306 }
2307
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2308 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2309 LocationSummary* locations = new (GetGraph()->GetAllocator())
2310 LocationSummary(flag, LocationSummary::kNoCall);
2311 locations->SetOut(Location::RequiresRegister());
2312 }
2313
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2314 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2315 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2316 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2317 }
2318
SelectCanUseCMOV(HSelect * select)2319 static bool SelectCanUseCMOV(HSelect* select) {
2320 // There are no conditional move instructions for XMMs.
2321 if (DataType::IsFloatingPointType(select->GetType())) {
2322 return false;
2323 }
2324
2325 // A FP condition doesn't generate the single CC that we need.
2326 // In 32 bit mode, a long condition doesn't generate a single CC either.
2327 HInstruction* condition = select->GetCondition();
2328 if (condition->IsCondition()) {
2329 DataType::Type compare_type = condition->InputAt(0)->GetType();
2330 if (compare_type == DataType::Type::kInt64 ||
2331 DataType::IsFloatingPointType(compare_type)) {
2332 return false;
2333 }
2334 }
2335
2336 // We can generate a CMOV for this Select.
2337 return true;
2338 }
2339
VisitSelect(HSelect * select)2340 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2341 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2342 if (DataType::IsFloatingPointType(select->GetType())) {
2343 locations->SetInAt(0, Location::RequiresFpuRegister());
2344 locations->SetInAt(1, Location::Any());
2345 } else {
2346 locations->SetInAt(0, Location::RequiresRegister());
2347 if (SelectCanUseCMOV(select)) {
2348 if (select->InputAt(1)->IsConstant()) {
2349 // Cmov can't handle a constant value.
2350 locations->SetInAt(1, Location::RequiresRegister());
2351 } else {
2352 locations->SetInAt(1, Location::Any());
2353 }
2354 } else {
2355 locations->SetInAt(1, Location::Any());
2356 }
2357 }
2358 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2359 locations->SetInAt(2, Location::RequiresRegister());
2360 }
2361 locations->SetOut(Location::SameAsFirstInput());
2362 }
2363
VisitSelect(HSelect * select)2364 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2365 LocationSummary* locations = select->GetLocations();
2366 DCHECK(locations->InAt(0).Equals(locations->Out()));
2367 if (SelectCanUseCMOV(select)) {
2368 // If both the condition and the source types are integer, we can generate
2369 // a CMOV to implement Select.
2370
2371 HInstruction* select_condition = select->GetCondition();
2372 Condition cond = kNotEqual;
2373
2374 // Figure out how to test the 'condition'.
2375 if (select_condition->IsCondition()) {
2376 HCondition* condition = select_condition->AsCondition();
2377 if (!condition->IsEmittedAtUseSite()) {
2378 // This was a previously materialized condition.
2379 // Can we use the existing condition code?
2380 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2381 // Materialization was the previous instruction. Condition codes are right.
2382 cond = X86Condition(condition->GetCondition());
2383 } else {
2384 // No, we have to recreate the condition code.
2385 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2386 __ testl(cond_reg, cond_reg);
2387 }
2388 } else {
2389 // We can't handle FP or long here.
2390 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2391 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2392 LocationSummary* cond_locations = condition->GetLocations();
2393 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2394 cond = X86Condition(condition->GetCondition());
2395 }
2396 } else {
2397 // Must be a Boolean condition, which needs to be compared to 0.
2398 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2399 __ testl(cond_reg, cond_reg);
2400 }
2401
2402 // If the condition is true, overwrite the output, which already contains false.
2403 Location false_loc = locations->InAt(0);
2404 Location true_loc = locations->InAt(1);
2405 if (select->GetType() == DataType::Type::kInt64) {
2406 // 64 bit conditional move.
2407 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2408 Register false_low = false_loc.AsRegisterPairLow<Register>();
2409 if (true_loc.IsRegisterPair()) {
2410 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2411 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2412 } else {
2413 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2414 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2415 }
2416 } else {
2417 // 32 bit conditional move.
2418 Register false_reg = false_loc.AsRegister<Register>();
2419 if (true_loc.IsRegister()) {
2420 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2421 } else {
2422 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2423 }
2424 }
2425 } else {
2426 NearLabel false_target;
2427 GenerateTestAndBranch<NearLabel>(
2428 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2429 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2430 __ Bind(&false_target);
2431 }
2432 }
2433
VisitNop(HNop * nop)2434 void LocationsBuilderX86::VisitNop(HNop* nop) {
2435 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2436 }
2437
VisitNop(HNop *)2438 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2439 // The environment recording already happened in CodeGenerator::Compile.
2440 }
2441
IncreaseFrame(size_t adjustment)2442 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2443 __ subl(ESP, Immediate(adjustment));
2444 __ cfi().AdjustCFAOffset(adjustment);
2445 }
2446
DecreaseFrame(size_t adjustment)2447 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2448 __ addl(ESP, Immediate(adjustment));
2449 __ cfi().AdjustCFAOffset(-adjustment);
2450 }
2451
GenerateNop()2452 void CodeGeneratorX86::GenerateNop() {
2453 __ nop();
2454 }
2455
HandleCondition(HCondition * cond)2456 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2457 LocationSummary* locations =
2458 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2459 // Handle the long/FP comparisons made in instruction simplification.
2460 switch (cond->InputAt(0)->GetType()) {
2461 case DataType::Type::kInt64: {
2462 locations->SetInAt(0, Location::RequiresRegister());
2463 locations->SetInAt(1, Location::Any());
2464 if (!cond->IsEmittedAtUseSite()) {
2465 locations->SetOut(Location::RequiresRegister());
2466 }
2467 break;
2468 }
2469 case DataType::Type::kFloat32:
2470 case DataType::Type::kFloat64: {
2471 locations->SetInAt(0, Location::RequiresFpuRegister());
2472 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2473 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2474 } else if (cond->InputAt(1)->IsConstant()) {
2475 locations->SetInAt(1, Location::RequiresFpuRegister());
2476 } else {
2477 locations->SetInAt(1, Location::Any());
2478 }
2479 if (!cond->IsEmittedAtUseSite()) {
2480 locations->SetOut(Location::RequiresRegister());
2481 }
2482 break;
2483 }
2484 default:
2485 locations->SetInAt(0, Location::RequiresRegister());
2486 locations->SetInAt(1, Location::Any());
2487 if (!cond->IsEmittedAtUseSite()) {
2488 // We need a byte register.
2489 locations->SetOut(Location::RegisterLocation(ECX));
2490 }
2491 break;
2492 }
2493 }
2494
HandleCondition(HCondition * cond)2495 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2496 if (cond->IsEmittedAtUseSite()) {
2497 return;
2498 }
2499
2500 LocationSummary* locations = cond->GetLocations();
2501 Location lhs = locations->InAt(0);
2502 Location rhs = locations->InAt(1);
2503 Register reg = locations->Out().AsRegister<Register>();
2504 NearLabel true_label, false_label;
2505
2506 switch (cond->InputAt(0)->GetType()) {
2507 default: {
2508 // Integer case.
2509
2510 // Clear output register: setb only sets the low byte.
2511 __ xorl(reg, reg);
2512 codegen_->GenerateIntCompare(lhs, rhs);
2513 __ setb(X86Condition(cond->GetCondition()), reg);
2514 return;
2515 }
2516 case DataType::Type::kInt64:
2517 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2518 break;
2519 case DataType::Type::kFloat32:
2520 GenerateFPCompare(lhs, rhs, cond, false);
2521 GenerateFPJumps(cond, &true_label, &false_label);
2522 break;
2523 case DataType::Type::kFloat64:
2524 GenerateFPCompare(lhs, rhs, cond, true);
2525 GenerateFPJumps(cond, &true_label, &false_label);
2526 break;
2527 }
2528
2529 // Convert the jumps into the result.
2530 NearLabel done_label;
2531
2532 // False case: result = 0.
2533 __ Bind(&false_label);
2534 __ xorl(reg, reg);
2535 __ jmp(&done_label);
2536
2537 // True case: result = 1.
2538 __ Bind(&true_label);
2539 __ movl(reg, Immediate(1));
2540 __ Bind(&done_label);
2541 }
2542
VisitEqual(HEqual * comp)2543 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2544 HandleCondition(comp);
2545 }
2546
VisitEqual(HEqual * comp)2547 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2548 HandleCondition(comp);
2549 }
2550
VisitNotEqual(HNotEqual * comp)2551 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2552 HandleCondition(comp);
2553 }
2554
VisitNotEqual(HNotEqual * comp)2555 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2556 HandleCondition(comp);
2557 }
2558
VisitLessThan(HLessThan * comp)2559 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2560 HandleCondition(comp);
2561 }
2562
VisitLessThan(HLessThan * comp)2563 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2564 HandleCondition(comp);
2565 }
2566
VisitLessThanOrEqual(HLessThanOrEqual * comp)2567 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2568 HandleCondition(comp);
2569 }
2570
VisitLessThanOrEqual(HLessThanOrEqual * comp)2571 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2572 HandleCondition(comp);
2573 }
2574
VisitGreaterThan(HGreaterThan * comp)2575 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2576 HandleCondition(comp);
2577 }
2578
VisitGreaterThan(HGreaterThan * comp)2579 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2580 HandleCondition(comp);
2581 }
2582
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2583 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2584 HandleCondition(comp);
2585 }
2586
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2587 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2588 HandleCondition(comp);
2589 }
2590
VisitBelow(HBelow * comp)2591 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2592 HandleCondition(comp);
2593 }
2594
VisitBelow(HBelow * comp)2595 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2596 HandleCondition(comp);
2597 }
2598
VisitBelowOrEqual(HBelowOrEqual * comp)2599 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2600 HandleCondition(comp);
2601 }
2602
VisitBelowOrEqual(HBelowOrEqual * comp)2603 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2604 HandleCondition(comp);
2605 }
2606
VisitAbove(HAbove * comp)2607 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2608 HandleCondition(comp);
2609 }
2610
VisitAbove(HAbove * comp)2611 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2612 HandleCondition(comp);
2613 }
2614
VisitAboveOrEqual(HAboveOrEqual * comp)2615 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2616 HandleCondition(comp);
2617 }
2618
VisitAboveOrEqual(HAboveOrEqual * comp)2619 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2620 HandleCondition(comp);
2621 }
2622
VisitIntConstant(HIntConstant * constant)2623 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2624 LocationSummary* locations =
2625 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2626 locations->SetOut(Location::ConstantLocation(constant));
2627 }
2628
VisitIntConstant(HIntConstant * constant)2629 void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2630 // Will be generated at use site.
2631 }
2632
VisitNullConstant(HNullConstant * constant)2633 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2634 LocationSummary* locations =
2635 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2636 locations->SetOut(Location::ConstantLocation(constant));
2637 }
2638
VisitNullConstant(HNullConstant * constant)2639 void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2640 // Will be generated at use site.
2641 }
2642
VisitLongConstant(HLongConstant * constant)2643 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2644 LocationSummary* locations =
2645 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2646 locations->SetOut(Location::ConstantLocation(constant));
2647 }
2648
VisitLongConstant(HLongConstant * constant)2649 void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2650 // Will be generated at use site.
2651 }
2652
VisitFloatConstant(HFloatConstant * constant)2653 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2654 LocationSummary* locations =
2655 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2656 locations->SetOut(Location::ConstantLocation(constant));
2657 }
2658
VisitFloatConstant(HFloatConstant * constant)2659 void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2660 // Will be generated at use site.
2661 }
2662
VisitDoubleConstant(HDoubleConstant * constant)2663 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2664 LocationSummary* locations =
2665 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2666 locations->SetOut(Location::ConstantLocation(constant));
2667 }
2668
VisitDoubleConstant(HDoubleConstant * constant)2669 void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
2670 // Will be generated at use site.
2671 }
2672
VisitConstructorFence(HConstructorFence * constructor_fence)2673 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2674 constructor_fence->SetLocations(nullptr);
2675 }
2676
VisitConstructorFence(HConstructorFence * constructor_fence)2677 void InstructionCodeGeneratorX86::VisitConstructorFence(
2678 [[maybe_unused]] HConstructorFence* constructor_fence) {
2679 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2680 }
2681
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2682 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2683 memory_barrier->SetLocations(nullptr);
2684 }
2685
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2686 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2687 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2688 }
2689
VisitReturnVoid(HReturnVoid * ret)2690 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2691 ret->SetLocations(nullptr);
2692 }
2693
VisitReturnVoid(HReturnVoid * ret)2694 void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2695 codegen_->GenerateFrameExit();
2696 }
2697
VisitReturn(HReturn * ret)2698 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2699 LocationSummary* locations =
2700 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2701 SetInForReturnValue(ret, locations);
2702 }
2703
VisitReturn(HReturn * ret)2704 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2705 switch (ret->InputAt(0)->GetType()) {
2706 case DataType::Type::kReference:
2707 case DataType::Type::kBool:
2708 case DataType::Type::kUint8:
2709 case DataType::Type::kInt8:
2710 case DataType::Type::kUint16:
2711 case DataType::Type::kInt16:
2712 case DataType::Type::kInt32:
2713 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2714 break;
2715
2716 case DataType::Type::kInt64:
2717 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2718 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2719 break;
2720
2721 case DataType::Type::kFloat32:
2722 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2723 if (GetGraph()->IsCompilingOsr()) {
2724 // To simplify callers of an OSR method, we put the return value in both
2725 // floating point and core registers.
2726 __ movd(EAX, XMM0);
2727 }
2728 break;
2729
2730 case DataType::Type::kFloat64:
2731 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2732 if (GetGraph()->IsCompilingOsr()) {
2733 // To simplify callers of an OSR method, we put the return value in both
2734 // floating point and core registers.
2735 __ movd(EAX, XMM0);
2736 // Use XMM1 as temporary register to not clobber XMM0.
2737 __ movaps(XMM1, XMM0);
2738 __ psrlq(XMM1, Immediate(32));
2739 __ movd(EDX, XMM1);
2740 }
2741 break;
2742
2743 default:
2744 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2745 }
2746 codegen_->GenerateFrameExit();
2747 }
2748
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2749 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2750 // The trampoline uses the same calling convention as dex calling conventions,
2751 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2752 // the method_idx.
2753 HandleInvoke(invoke);
2754 }
2755
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2756 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2757 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2758 }
2759
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2760 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2761 // Explicit clinit checks triggered by static invokes must have been pruned by
2762 // art::PrepareForRegisterAllocation.
2763 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2764
2765 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2766 if (intrinsic.TryDispatch(invoke)) {
2767 if (invoke->GetLocations()->CanCall() &&
2768 invoke->HasPcRelativeMethodLoadKind() &&
2769 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2770 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2771 }
2772 return;
2773 }
2774
2775 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2776 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2777 /*for_register_allocation=*/ true);
2778 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2779 } else {
2780 HandleInvoke(invoke);
2781 }
2782
2783 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2784 if (invoke->HasPcRelativeMethodLoadKind()) {
2785 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2786 }
2787 }
2788
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2789 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2790 if (invoke->GetLocations()->Intrinsified()) {
2791 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2792 intrinsic.Dispatch(invoke);
2793 return true;
2794 }
2795 return false;
2796 }
2797
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2798 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2799 // Explicit clinit checks triggered by static invokes must have been pruned by
2800 // art::PrepareForRegisterAllocation.
2801 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2802
2803 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2804 return;
2805 }
2806
2807 LocationSummary* locations = invoke->GetLocations();
2808 codegen_->GenerateStaticOrDirectCall(
2809 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2810 }
2811
VisitInvokeVirtual(HInvokeVirtual * invoke)2812 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2813 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2814 if (intrinsic.TryDispatch(invoke)) {
2815 return;
2816 }
2817
2818 HandleInvoke(invoke);
2819
2820 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2821 // Add one temporary for inline cache update.
2822 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2823 }
2824 }
2825
HandleInvoke(HInvoke * invoke)2826 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2827 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2828 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2829 }
2830
VisitInvokeVirtual(HInvokeVirtual * invoke)2831 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2832 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2833 return;
2834 }
2835
2836 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2837 DCHECK(!codegen_->IsLeafMethod());
2838 }
2839
VisitInvokeInterface(HInvokeInterface * invoke)2840 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2841 // This call to HandleInvoke allocates a temporary (core) register
2842 // which is also used to transfer the hidden argument from FP to
2843 // core register.
2844 HandleInvoke(invoke);
2845 // Add the hidden argument.
2846 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2847
2848 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2849 // Add one temporary for inline cache update.
2850 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2851 }
2852
2853 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2854 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2855 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2856 }
2857
2858 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2859 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2860 Location::RequiresRegister());
2861 }
2862 }
2863
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2864 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2865 DCHECK_EQ(EAX, klass);
2866 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
2867 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2868 DCHECK(info != nullptr);
2869 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
2870 info, GetCompilerOptions(), instruction->AsInvoke());
2871 if (cache != nullptr) {
2872 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2873 if (kIsDebugBuild) {
2874 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2875 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2876 }
2877 Register temp = EBP;
2878 NearLabel done;
2879 __ movl(temp, Immediate(address));
2880 // Fast path for a monomorphic cache.
2881 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2882 __ j(kEqual, &done);
2883 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2884 __ Bind(&done);
2885 } else {
2886 // This is unexpected, but we don't guarantee stable compilation across
2887 // JIT runs so just warn about it.
2888 ScopedObjectAccess soa(Thread::Current());
2889 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
2890 }
2891 }
2892 }
2893
VisitInvokeInterface(HInvokeInterface * invoke)2894 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2895 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2896 LocationSummary* locations = invoke->GetLocations();
2897 Register temp = locations->GetTemp(0).AsRegister<Register>();
2898 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2899 Location receiver = locations->InAt(0);
2900 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2901
2902 // Set the hidden argument. This is safe to do this here, as XMM7
2903 // won't be modified thereafter, before the `call` instruction.
2904 DCHECK_EQ(XMM7, hidden_reg);
2905 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2906 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2907 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2908 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2909 __ movd(hidden_reg, temp);
2910 }
2911
2912 if (receiver.IsStackSlot()) {
2913 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2914 // /* HeapReference<Class> */ temp = temp->klass_
2915 __ movl(temp, Address(temp, class_offset));
2916 } else {
2917 // /* HeapReference<Class> */ temp = receiver->klass_
2918 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2919 }
2920 codegen_->MaybeRecordImplicitNullCheck(invoke);
2921 // Instead of simply (possibly) unpoisoning `temp` here, we should
2922 // emit a read barrier for the previous class reference load.
2923 // However this is not required in practice, as this is an
2924 // intermediate/temporary reference and because the current
2925 // concurrent copying collector keeps the from-space memory
2926 // intact/accessible until the end of the marking phase (the
2927 // concurrent copying collector may not in the future).
2928 __ MaybeUnpoisonHeapReference(temp);
2929
2930 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2931
2932 // temp = temp->GetAddressOfIMT()
2933 __ movl(temp,
2934 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2935 // temp = temp->GetImtEntryAt(method_offset);
2936 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2937 invoke->GetImtIndex(), kX86PointerSize));
2938 __ movl(temp, Address(temp, method_offset));
2939 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2940 // We pass the method from the IMT in case of a conflict. This will ensure
2941 // we go into the runtime to resolve the actual method.
2942 __ movd(hidden_reg, temp);
2943 }
2944 // call temp->GetEntryPoint();
2945 __ call(Address(temp,
2946 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2947
2948 DCHECK(!codegen_->IsLeafMethod());
2949 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2950 }
2951
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2952 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2953 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2954 if (intrinsic.TryDispatch(invoke)) {
2955 return;
2956 }
2957 HandleInvoke(invoke);
2958 }
2959
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2960 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2961 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2962 return;
2963 }
2964 codegen_->GenerateInvokePolymorphicCall(invoke);
2965 }
2966
VisitInvokeCustom(HInvokeCustom * invoke)2967 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2968 HandleInvoke(invoke);
2969 }
2970
VisitInvokeCustom(HInvokeCustom * invoke)2971 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2972 codegen_->GenerateInvokeCustomCall(invoke);
2973 }
2974
VisitNeg(HNeg * neg)2975 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2976 LocationSummary* locations =
2977 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2978 switch (neg->GetResultType()) {
2979 case DataType::Type::kInt32:
2980 case DataType::Type::kInt64:
2981 locations->SetInAt(0, Location::RequiresRegister());
2982 locations->SetOut(Location::SameAsFirstInput());
2983 break;
2984
2985 case DataType::Type::kFloat32:
2986 locations->SetInAt(0, Location::RequiresFpuRegister());
2987 locations->SetOut(Location::SameAsFirstInput());
2988 locations->AddTemp(Location::RequiresRegister());
2989 locations->AddTemp(Location::RequiresFpuRegister());
2990 break;
2991
2992 case DataType::Type::kFloat64:
2993 locations->SetInAt(0, Location::RequiresFpuRegister());
2994 locations->SetOut(Location::SameAsFirstInput());
2995 locations->AddTemp(Location::RequiresFpuRegister());
2996 break;
2997
2998 default:
2999 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3000 }
3001 }
3002
VisitNeg(HNeg * neg)3003 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
3004 LocationSummary* locations = neg->GetLocations();
3005 Location out = locations->Out();
3006 Location in = locations->InAt(0);
3007 switch (neg->GetResultType()) {
3008 case DataType::Type::kInt32:
3009 DCHECK(in.IsRegister());
3010 DCHECK(in.Equals(out));
3011 __ negl(out.AsRegister<Register>());
3012 break;
3013
3014 case DataType::Type::kInt64:
3015 DCHECK(in.IsRegisterPair());
3016 DCHECK(in.Equals(out));
3017 __ negl(out.AsRegisterPairLow<Register>());
3018 // Negation is similar to subtraction from zero. The least
3019 // significant byte triggers a borrow when it is different from
3020 // zero; to take it into account, add 1 to the most significant
3021 // byte if the carry flag (CF) is set to 1 after the first NEGL
3022 // operation.
3023 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
3024 __ negl(out.AsRegisterPairHigh<Register>());
3025 break;
3026
3027 case DataType::Type::kFloat32: {
3028 DCHECK(in.Equals(out));
3029 Register constant = locations->GetTemp(0).AsRegister<Register>();
3030 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
3031 // Implement float negation with an exclusive or with value
3032 // 0x80000000 (mask for bit 31, representing the sign of a
3033 // single-precision floating-point number).
3034 __ movl(constant, Immediate(INT32_C(0x80000000)));
3035 __ movd(mask, constant);
3036 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3037 break;
3038 }
3039
3040 case DataType::Type::kFloat64: {
3041 DCHECK(in.Equals(out));
3042 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3043 // Implement double negation with an exclusive or with value
3044 // 0x8000000000000000 (mask for bit 63, representing the sign of
3045 // a double-precision floating-point number).
3046 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
3047 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3048 break;
3049 }
3050
3051 default:
3052 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3053 }
3054 }
3055
VisitX86FPNeg(HX86FPNeg * neg)3056 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
3057 LocationSummary* locations =
3058 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3059 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
3060 locations->SetInAt(0, Location::RequiresFpuRegister());
3061 locations->SetInAt(1, Location::RequiresRegister());
3062 locations->SetOut(Location::SameAsFirstInput());
3063 locations->AddTemp(Location::RequiresFpuRegister());
3064 }
3065
VisitX86FPNeg(HX86FPNeg * neg)3066 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
3067 LocationSummary* locations = neg->GetLocations();
3068 Location out = locations->Out();
3069 DCHECK(locations->InAt(0).Equals(out));
3070
3071 Register constant_area = locations->InAt(1).AsRegister<Register>();
3072 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3073 if (neg->GetType() == DataType::Type::kFloat32) {
3074 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
3075 neg->GetBaseMethodAddress(),
3076 constant_area));
3077 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3078 } else {
3079 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
3080 neg->GetBaseMethodAddress(),
3081 constant_area));
3082 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3083 }
3084 }
3085
VisitTypeConversion(HTypeConversion * conversion)3086 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
3087 DataType::Type result_type = conversion->GetResultType();
3088 DataType::Type input_type = conversion->GetInputType();
3089 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3090 << input_type << " -> " << result_type;
3091
3092 // The float-to-long and double-to-long type conversions rely on a
3093 // call to the runtime.
3094 LocationSummary::CallKind call_kind =
3095 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3096 && result_type == DataType::Type::kInt64)
3097 ? LocationSummary::kCallOnMainOnly
3098 : LocationSummary::kNoCall;
3099 LocationSummary* locations =
3100 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3101
3102 switch (result_type) {
3103 case DataType::Type::kUint8:
3104 case DataType::Type::kInt8:
3105 switch (input_type) {
3106 case DataType::Type::kUint8:
3107 case DataType::Type::kInt8:
3108 case DataType::Type::kUint16:
3109 case DataType::Type::kInt16:
3110 case DataType::Type::kInt32:
3111 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
3112 // Make the output overlap to please the register allocator. This greatly simplifies
3113 // the validation of the linear scan implementation
3114 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3115 break;
3116 case DataType::Type::kInt64: {
3117 HInstruction* input = conversion->InputAt(0);
3118 Location input_location = input->IsConstant()
3119 ? Location::ConstantLocation(input)
3120 : Location::RegisterPairLocation(EAX, EDX);
3121 locations->SetInAt(0, input_location);
3122 // Make the output overlap to please the register allocator. This greatly simplifies
3123 // the validation of the linear scan implementation
3124 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3125 break;
3126 }
3127
3128 default:
3129 LOG(FATAL) << "Unexpected type conversion from " << input_type
3130 << " to " << result_type;
3131 }
3132 break;
3133
3134 case DataType::Type::kUint16:
3135 case DataType::Type::kInt16:
3136 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3137 locations->SetInAt(0, Location::Any());
3138 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3139 break;
3140
3141 case DataType::Type::kInt32:
3142 switch (input_type) {
3143 case DataType::Type::kInt64:
3144 locations->SetInAt(0, Location::Any());
3145 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3146 break;
3147
3148 case DataType::Type::kFloat32:
3149 locations->SetInAt(0, Location::RequiresFpuRegister());
3150 locations->SetOut(Location::RequiresRegister());
3151 locations->AddTemp(Location::RequiresFpuRegister());
3152 break;
3153
3154 case DataType::Type::kFloat64:
3155 locations->SetInAt(0, Location::RequiresFpuRegister());
3156 locations->SetOut(Location::RequiresRegister());
3157 locations->AddTemp(Location::RequiresFpuRegister());
3158 break;
3159
3160 default:
3161 LOG(FATAL) << "Unexpected type conversion from " << input_type
3162 << " to " << result_type;
3163 }
3164 break;
3165
3166 case DataType::Type::kInt64:
3167 switch (input_type) {
3168 case DataType::Type::kBool:
3169 case DataType::Type::kUint8:
3170 case DataType::Type::kInt8:
3171 case DataType::Type::kUint16:
3172 case DataType::Type::kInt16:
3173 case DataType::Type::kInt32:
3174 locations->SetInAt(0, Location::RegisterLocation(EAX));
3175 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3176 break;
3177
3178 case DataType::Type::kFloat32:
3179 case DataType::Type::kFloat64: {
3180 InvokeRuntimeCallingConvention calling_convention;
3181 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3182 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3183
3184 // The runtime helper puts the result in EAX, EDX.
3185 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3186 }
3187 break;
3188
3189 default:
3190 LOG(FATAL) << "Unexpected type conversion from " << input_type
3191 << " to " << result_type;
3192 }
3193 break;
3194
3195 case DataType::Type::kFloat32:
3196 switch (input_type) {
3197 case DataType::Type::kBool:
3198 case DataType::Type::kUint8:
3199 case DataType::Type::kInt8:
3200 case DataType::Type::kUint16:
3201 case DataType::Type::kInt16:
3202 case DataType::Type::kInt32:
3203 locations->SetInAt(0, Location::RequiresRegister());
3204 locations->SetOut(Location::RequiresFpuRegister());
3205 break;
3206
3207 case DataType::Type::kInt64:
3208 locations->SetInAt(0, Location::Any());
3209 locations->SetOut(Location::Any());
3210 break;
3211
3212 case DataType::Type::kFloat64:
3213 locations->SetInAt(0, Location::RequiresFpuRegister());
3214 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3215 break;
3216
3217 default:
3218 LOG(FATAL) << "Unexpected type conversion from " << input_type
3219 << " to " << result_type;
3220 }
3221 break;
3222
3223 case DataType::Type::kFloat64:
3224 switch (input_type) {
3225 case DataType::Type::kBool:
3226 case DataType::Type::kUint8:
3227 case DataType::Type::kInt8:
3228 case DataType::Type::kUint16:
3229 case DataType::Type::kInt16:
3230 case DataType::Type::kInt32:
3231 locations->SetInAt(0, Location::RequiresRegister());
3232 locations->SetOut(Location::RequiresFpuRegister());
3233 break;
3234
3235 case DataType::Type::kInt64:
3236 locations->SetInAt(0, Location::Any());
3237 locations->SetOut(Location::Any());
3238 break;
3239
3240 case DataType::Type::kFloat32:
3241 locations->SetInAt(0, Location::RequiresFpuRegister());
3242 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3243 break;
3244
3245 default:
3246 LOG(FATAL) << "Unexpected type conversion from " << input_type
3247 << " to " << result_type;
3248 }
3249 break;
3250
3251 default:
3252 LOG(FATAL) << "Unexpected type conversion from " << input_type
3253 << " to " << result_type;
3254 }
3255 }
3256
VisitTypeConversion(HTypeConversion * conversion)3257 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3258 LocationSummary* locations = conversion->GetLocations();
3259 Location out = locations->Out();
3260 Location in = locations->InAt(0);
3261 DataType::Type result_type = conversion->GetResultType();
3262 DataType::Type input_type = conversion->GetInputType();
3263 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3264 << input_type << " -> " << result_type;
3265 switch (result_type) {
3266 case DataType::Type::kUint8:
3267 switch (input_type) {
3268 case DataType::Type::kInt8:
3269 case DataType::Type::kUint16:
3270 case DataType::Type::kInt16:
3271 case DataType::Type::kInt32:
3272 if (in.IsRegister()) {
3273 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3274 } else {
3275 DCHECK(in.GetConstant()->IsIntConstant());
3276 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3277 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3278 }
3279 break;
3280 case DataType::Type::kInt64:
3281 if (in.IsRegisterPair()) {
3282 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3283 } else {
3284 DCHECK(in.GetConstant()->IsLongConstant());
3285 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3286 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3287 }
3288 break;
3289
3290 default:
3291 LOG(FATAL) << "Unexpected type conversion from " << input_type
3292 << " to " << result_type;
3293 }
3294 break;
3295
3296 case DataType::Type::kInt8:
3297 switch (input_type) {
3298 case DataType::Type::kUint8:
3299 case DataType::Type::kUint16:
3300 case DataType::Type::kInt16:
3301 case DataType::Type::kInt32:
3302 if (in.IsRegister()) {
3303 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3304 } else {
3305 DCHECK(in.GetConstant()->IsIntConstant());
3306 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3307 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3308 }
3309 break;
3310 case DataType::Type::kInt64:
3311 if (in.IsRegisterPair()) {
3312 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3313 } else {
3314 DCHECK(in.GetConstant()->IsLongConstant());
3315 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3316 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3317 }
3318 break;
3319
3320 default:
3321 LOG(FATAL) << "Unexpected type conversion from " << input_type
3322 << " to " << result_type;
3323 }
3324 break;
3325
3326 case DataType::Type::kUint16:
3327 switch (input_type) {
3328 case DataType::Type::kInt8:
3329 case DataType::Type::kInt16:
3330 case DataType::Type::kInt32:
3331 if (in.IsRegister()) {
3332 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3333 } else if (in.IsStackSlot()) {
3334 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3335 } else {
3336 DCHECK(in.GetConstant()->IsIntConstant());
3337 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3338 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3339 }
3340 break;
3341 case DataType::Type::kInt64:
3342 if (in.IsRegisterPair()) {
3343 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3344 } else if (in.IsDoubleStackSlot()) {
3345 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3346 } else {
3347 DCHECK(in.GetConstant()->IsLongConstant());
3348 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3349 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3350 }
3351 break;
3352
3353 default:
3354 LOG(FATAL) << "Unexpected type conversion from " << input_type
3355 << " to " << result_type;
3356 }
3357 break;
3358
3359 case DataType::Type::kInt16:
3360 switch (input_type) {
3361 case DataType::Type::kUint16:
3362 case DataType::Type::kInt32:
3363 if (in.IsRegister()) {
3364 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3365 } else if (in.IsStackSlot()) {
3366 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3367 } else {
3368 DCHECK(in.GetConstant()->IsIntConstant());
3369 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3370 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3371 }
3372 break;
3373 case DataType::Type::kInt64:
3374 if (in.IsRegisterPair()) {
3375 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3376 } else if (in.IsDoubleStackSlot()) {
3377 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3378 } else {
3379 DCHECK(in.GetConstant()->IsLongConstant());
3380 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3381 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3382 }
3383 break;
3384
3385 default:
3386 LOG(FATAL) << "Unexpected type conversion from " << input_type
3387 << " to " << result_type;
3388 }
3389 break;
3390
3391 case DataType::Type::kInt32:
3392 switch (input_type) {
3393 case DataType::Type::kInt64:
3394 if (in.IsRegisterPair()) {
3395 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3396 } else if (in.IsDoubleStackSlot()) {
3397 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3398 } else {
3399 DCHECK(in.IsConstant());
3400 DCHECK(in.GetConstant()->IsLongConstant());
3401 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3402 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3403 }
3404 break;
3405
3406 case DataType::Type::kFloat32: {
3407 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3408 Register output = out.AsRegister<Register>();
3409 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3410 NearLabel done, nan;
3411
3412 __ movl(output, Immediate(kPrimIntMax));
3413 // temp = int-to-float(output)
3414 __ cvtsi2ss(temp, output);
3415 // if input >= temp goto done
3416 __ comiss(input, temp);
3417 __ j(kAboveEqual, &done);
3418 // if input == NaN goto nan
3419 __ j(kUnordered, &nan);
3420 // output = float-to-int-truncate(input)
3421 __ cvttss2si(output, input);
3422 __ jmp(&done);
3423 __ Bind(&nan);
3424 // output = 0
3425 __ xorl(output, output);
3426 __ Bind(&done);
3427 break;
3428 }
3429
3430 case DataType::Type::kFloat64: {
3431 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3432 Register output = out.AsRegister<Register>();
3433 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3434 NearLabel done, nan;
3435
3436 __ movl(output, Immediate(kPrimIntMax));
3437 // temp = int-to-double(output)
3438 __ cvtsi2sd(temp, output);
3439 // if input >= temp goto done
3440 __ comisd(input, temp);
3441 __ j(kAboveEqual, &done);
3442 // if input == NaN goto nan
3443 __ j(kUnordered, &nan);
3444 // output = double-to-int-truncate(input)
3445 __ cvttsd2si(output, input);
3446 __ jmp(&done);
3447 __ Bind(&nan);
3448 // output = 0
3449 __ xorl(output, output);
3450 __ Bind(&done);
3451 break;
3452 }
3453
3454 default:
3455 LOG(FATAL) << "Unexpected type conversion from " << input_type
3456 << " to " << result_type;
3457 }
3458 break;
3459
3460 case DataType::Type::kInt64:
3461 switch (input_type) {
3462 case DataType::Type::kBool:
3463 case DataType::Type::kUint8:
3464 case DataType::Type::kInt8:
3465 case DataType::Type::kUint16:
3466 case DataType::Type::kInt16:
3467 case DataType::Type::kInt32:
3468 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3469 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3470 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3471 __ cdq();
3472 break;
3473
3474 case DataType::Type::kFloat32:
3475 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3476 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3477 break;
3478
3479 case DataType::Type::kFloat64:
3480 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3481 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3482 break;
3483
3484 default:
3485 LOG(FATAL) << "Unexpected type conversion from " << input_type
3486 << " to " << result_type;
3487 }
3488 break;
3489
3490 case DataType::Type::kFloat32:
3491 switch (input_type) {
3492 case DataType::Type::kBool:
3493 case DataType::Type::kUint8:
3494 case DataType::Type::kInt8:
3495 case DataType::Type::kUint16:
3496 case DataType::Type::kInt16:
3497 case DataType::Type::kInt32:
3498 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3499 break;
3500
3501 case DataType::Type::kInt64: {
3502 size_t adjustment = 0;
3503
3504 // Create stack space for the call to
3505 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3506 // TODO: enhance register allocator to ask for stack temporaries.
3507 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3508 adjustment = DataType::Size(DataType::Type::kInt64);
3509 codegen_->IncreaseFrame(adjustment);
3510 }
3511
3512 // Load the value to the FP stack, using temporaries if needed.
3513 PushOntoFPStack(in, 0, adjustment, false, true);
3514
3515 if (out.IsStackSlot()) {
3516 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3517 } else {
3518 __ fstps(Address(ESP, 0));
3519 Location stack_temp = Location::StackSlot(0);
3520 codegen_->Move32(out, stack_temp);
3521 }
3522
3523 // Remove the temporary stack space we allocated.
3524 if (adjustment != 0) {
3525 codegen_->DecreaseFrame(adjustment);
3526 }
3527 break;
3528 }
3529
3530 case DataType::Type::kFloat64:
3531 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3532 break;
3533
3534 default:
3535 LOG(FATAL) << "Unexpected type conversion from " << input_type
3536 << " to " << result_type;
3537 }
3538 break;
3539
3540 case DataType::Type::kFloat64:
3541 switch (input_type) {
3542 case DataType::Type::kBool:
3543 case DataType::Type::kUint8:
3544 case DataType::Type::kInt8:
3545 case DataType::Type::kUint16:
3546 case DataType::Type::kInt16:
3547 case DataType::Type::kInt32:
3548 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3549 break;
3550
3551 case DataType::Type::kInt64: {
3552 size_t adjustment = 0;
3553
3554 // Create stack space for the call to
3555 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3556 // TODO: enhance register allocator to ask for stack temporaries.
3557 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3558 adjustment = DataType::Size(DataType::Type::kInt64);
3559 codegen_->IncreaseFrame(adjustment);
3560 }
3561
3562 // Load the value to the FP stack, using temporaries if needed.
3563 PushOntoFPStack(in, 0, adjustment, false, true);
3564
3565 if (out.IsDoubleStackSlot()) {
3566 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3567 } else {
3568 __ fstpl(Address(ESP, 0));
3569 Location stack_temp = Location::DoubleStackSlot(0);
3570 codegen_->Move64(out, stack_temp);
3571 }
3572
3573 // Remove the temporary stack space we allocated.
3574 if (adjustment != 0) {
3575 codegen_->DecreaseFrame(adjustment);
3576 }
3577 break;
3578 }
3579
3580 case DataType::Type::kFloat32:
3581 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3582 break;
3583
3584 default:
3585 LOG(FATAL) << "Unexpected type conversion from " << input_type
3586 << " to " << result_type;
3587 }
3588 break;
3589
3590 default:
3591 LOG(FATAL) << "Unexpected type conversion from " << input_type
3592 << " to " << result_type;
3593 }
3594 }
3595
VisitAdd(HAdd * add)3596 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3597 LocationSummary* locations =
3598 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3599 switch (add->GetResultType()) {
3600 case DataType::Type::kInt32: {
3601 locations->SetInAt(0, Location::RequiresRegister());
3602 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3603 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3604 break;
3605 }
3606
3607 case DataType::Type::kInt64: {
3608 locations->SetInAt(0, Location::RequiresRegister());
3609 locations->SetInAt(1, Location::Any());
3610 locations->SetOut(Location::SameAsFirstInput());
3611 break;
3612 }
3613
3614 case DataType::Type::kFloat32:
3615 case DataType::Type::kFloat64: {
3616 locations->SetInAt(0, Location::RequiresFpuRegister());
3617 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3618 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3619 } else if (add->InputAt(1)->IsConstant()) {
3620 locations->SetInAt(1, Location::RequiresFpuRegister());
3621 } else {
3622 locations->SetInAt(1, Location::Any());
3623 }
3624 locations->SetOut(Location::SameAsFirstInput());
3625 break;
3626 }
3627
3628 default:
3629 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3630 UNREACHABLE();
3631 }
3632 }
3633
VisitAdd(HAdd * add)3634 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3635 LocationSummary* locations = add->GetLocations();
3636 Location first = locations->InAt(0);
3637 Location second = locations->InAt(1);
3638 Location out = locations->Out();
3639
3640 switch (add->GetResultType()) {
3641 case DataType::Type::kInt32: {
3642 if (second.IsRegister()) {
3643 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3644 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3645 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3646 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3647 } else {
3648 __ leal(out.AsRegister<Register>(), Address(
3649 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3650 }
3651 } else if (second.IsConstant()) {
3652 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3653 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3654 __ addl(out.AsRegister<Register>(), Immediate(value));
3655 } else {
3656 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3657 }
3658 } else {
3659 DCHECK(first.Equals(locations->Out()));
3660 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3661 }
3662 break;
3663 }
3664
3665 case DataType::Type::kInt64: {
3666 if (second.IsRegisterPair()) {
3667 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3668 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3669 } else if (second.IsDoubleStackSlot()) {
3670 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3671 __ adcl(first.AsRegisterPairHigh<Register>(),
3672 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3673 } else {
3674 DCHECK(second.IsConstant()) << second;
3675 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3676 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3677 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3678 }
3679 break;
3680 }
3681
3682 case DataType::Type::kFloat32: {
3683 if (second.IsFpuRegister()) {
3684 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3685 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3686 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3687 DCHECK(const_area->IsEmittedAtUseSite());
3688 __ addss(first.AsFpuRegister<XmmRegister>(),
3689 codegen_->LiteralFloatAddress(
3690 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3691 const_area->GetBaseMethodAddress(),
3692 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3693 } else {
3694 DCHECK(second.IsStackSlot());
3695 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3696 }
3697 break;
3698 }
3699
3700 case DataType::Type::kFloat64: {
3701 if (second.IsFpuRegister()) {
3702 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3703 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3704 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3705 DCHECK(const_area->IsEmittedAtUseSite());
3706 __ addsd(first.AsFpuRegister<XmmRegister>(),
3707 codegen_->LiteralDoubleAddress(
3708 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3709 const_area->GetBaseMethodAddress(),
3710 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3711 } else {
3712 DCHECK(second.IsDoubleStackSlot());
3713 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3714 }
3715 break;
3716 }
3717
3718 default:
3719 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3720 }
3721 }
3722
VisitSub(HSub * sub)3723 void LocationsBuilderX86::VisitSub(HSub* sub) {
3724 LocationSummary* locations =
3725 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3726 switch (sub->GetResultType()) {
3727 case DataType::Type::kInt32:
3728 case DataType::Type::kInt64: {
3729 locations->SetInAt(0, Location::RequiresRegister());
3730 locations->SetInAt(1, Location::Any());
3731 locations->SetOut(Location::SameAsFirstInput());
3732 break;
3733 }
3734 case DataType::Type::kFloat32:
3735 case DataType::Type::kFloat64: {
3736 locations->SetInAt(0, Location::RequiresFpuRegister());
3737 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3738 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3739 } else if (sub->InputAt(1)->IsConstant()) {
3740 locations->SetInAt(1, Location::RequiresFpuRegister());
3741 } else {
3742 locations->SetInAt(1, Location::Any());
3743 }
3744 locations->SetOut(Location::SameAsFirstInput());
3745 break;
3746 }
3747
3748 default:
3749 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3750 }
3751 }
3752
VisitSub(HSub * sub)3753 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3754 LocationSummary* locations = sub->GetLocations();
3755 Location first = locations->InAt(0);
3756 Location second = locations->InAt(1);
3757 DCHECK(first.Equals(locations->Out()));
3758 switch (sub->GetResultType()) {
3759 case DataType::Type::kInt32: {
3760 if (second.IsRegister()) {
3761 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3762 } else if (second.IsConstant()) {
3763 __ subl(first.AsRegister<Register>(),
3764 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3765 } else {
3766 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3767 }
3768 break;
3769 }
3770
3771 case DataType::Type::kInt64: {
3772 if (second.IsRegisterPair()) {
3773 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3774 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3775 } else if (second.IsDoubleStackSlot()) {
3776 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3777 __ sbbl(first.AsRegisterPairHigh<Register>(),
3778 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3779 } else {
3780 DCHECK(second.IsConstant()) << second;
3781 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3782 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3783 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3784 }
3785 break;
3786 }
3787
3788 case DataType::Type::kFloat32: {
3789 if (second.IsFpuRegister()) {
3790 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3791 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3792 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3793 DCHECK(const_area->IsEmittedAtUseSite());
3794 __ subss(first.AsFpuRegister<XmmRegister>(),
3795 codegen_->LiteralFloatAddress(
3796 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3797 const_area->GetBaseMethodAddress(),
3798 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3799 } else {
3800 DCHECK(second.IsStackSlot());
3801 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3802 }
3803 break;
3804 }
3805
3806 case DataType::Type::kFloat64: {
3807 if (second.IsFpuRegister()) {
3808 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3809 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3810 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3811 DCHECK(const_area->IsEmittedAtUseSite());
3812 __ subsd(first.AsFpuRegister<XmmRegister>(),
3813 codegen_->LiteralDoubleAddress(
3814 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3815 const_area->GetBaseMethodAddress(),
3816 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3817 } else {
3818 DCHECK(second.IsDoubleStackSlot());
3819 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3820 }
3821 break;
3822 }
3823
3824 default:
3825 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3826 }
3827 }
3828
VisitMul(HMul * mul)3829 void LocationsBuilderX86::VisitMul(HMul* mul) {
3830 LocationSummary* locations =
3831 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3832 switch (mul->GetResultType()) {
3833 case DataType::Type::kInt32:
3834 locations->SetInAt(0, Location::RequiresRegister());
3835 locations->SetInAt(1, Location::Any());
3836 if (mul->InputAt(1)->IsIntConstant()) {
3837 // Can use 3 operand multiply.
3838 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3839 } else {
3840 locations->SetOut(Location::SameAsFirstInput());
3841 }
3842 break;
3843 case DataType::Type::kInt64: {
3844 locations->SetInAt(0, Location::RequiresRegister());
3845 locations->SetInAt(1, Location::Any());
3846 locations->SetOut(Location::SameAsFirstInput());
3847 // Needed for imul on 32bits with 64bits output.
3848 locations->AddTemp(Location::RegisterLocation(EAX));
3849 locations->AddTemp(Location::RegisterLocation(EDX));
3850 break;
3851 }
3852 case DataType::Type::kFloat32:
3853 case DataType::Type::kFloat64: {
3854 locations->SetInAt(0, Location::RequiresFpuRegister());
3855 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3856 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3857 } else if (mul->InputAt(1)->IsConstant()) {
3858 locations->SetInAt(1, Location::RequiresFpuRegister());
3859 } else {
3860 locations->SetInAt(1, Location::Any());
3861 }
3862 locations->SetOut(Location::SameAsFirstInput());
3863 break;
3864 }
3865
3866 default:
3867 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3868 }
3869 }
3870
VisitMul(HMul * mul)3871 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3872 LocationSummary* locations = mul->GetLocations();
3873 Location first = locations->InAt(0);
3874 Location second = locations->InAt(1);
3875 Location out = locations->Out();
3876
3877 switch (mul->GetResultType()) {
3878 case DataType::Type::kInt32:
3879 // The constant may have ended up in a register, so test explicitly to avoid
3880 // problems where the output may not be the same as the first operand.
3881 if (mul->InputAt(1)->IsIntConstant()) {
3882 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3883 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3884 } else if (second.IsRegister()) {
3885 DCHECK(first.Equals(out));
3886 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3887 } else {
3888 DCHECK(second.IsStackSlot());
3889 DCHECK(first.Equals(out));
3890 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3891 }
3892 break;
3893
3894 case DataType::Type::kInt64: {
3895 Register in1_hi = first.AsRegisterPairHigh<Register>();
3896 Register in1_lo = first.AsRegisterPairLow<Register>();
3897 Register eax = locations->GetTemp(0).AsRegister<Register>();
3898 Register edx = locations->GetTemp(1).AsRegister<Register>();
3899
3900 DCHECK_EQ(EAX, eax);
3901 DCHECK_EQ(EDX, edx);
3902
3903 // input: in1 - 64 bits, in2 - 64 bits.
3904 // output: in1
3905 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3906 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3907 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3908 if (second.IsConstant()) {
3909 DCHECK(second.GetConstant()->IsLongConstant());
3910
3911 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3912 int32_t low_value = Low32Bits(value);
3913 int32_t high_value = High32Bits(value);
3914 Immediate low(low_value);
3915 Immediate high(high_value);
3916
3917 __ movl(eax, high);
3918 // eax <- in1.lo * in2.hi
3919 __ imull(eax, in1_lo);
3920 // in1.hi <- in1.hi * in2.lo
3921 __ imull(in1_hi, low);
3922 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3923 __ addl(in1_hi, eax);
3924 // move in2_lo to eax to prepare for double precision
3925 __ movl(eax, low);
3926 // edx:eax <- in1.lo * in2.lo
3927 __ mull(in1_lo);
3928 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3929 __ addl(in1_hi, edx);
3930 // in1.lo <- (in1.lo * in2.lo)[31:0];
3931 __ movl(in1_lo, eax);
3932 } else if (second.IsRegisterPair()) {
3933 Register in2_hi = second.AsRegisterPairHigh<Register>();
3934 Register in2_lo = second.AsRegisterPairLow<Register>();
3935
3936 __ movl(eax, in2_hi);
3937 // eax <- in1.lo * in2.hi
3938 __ imull(eax, in1_lo);
3939 // in1.hi <- in1.hi * in2.lo
3940 __ imull(in1_hi, in2_lo);
3941 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3942 __ addl(in1_hi, eax);
3943 // move in1_lo to eax to prepare for double precision
3944 __ movl(eax, in1_lo);
3945 // edx:eax <- in1.lo * in2.lo
3946 __ mull(in2_lo);
3947 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3948 __ addl(in1_hi, edx);
3949 // in1.lo <- (in1.lo * in2.lo)[31:0];
3950 __ movl(in1_lo, eax);
3951 } else {
3952 DCHECK(second.IsDoubleStackSlot()) << second;
3953 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3954 Address in2_lo(ESP, second.GetStackIndex());
3955
3956 __ movl(eax, in2_hi);
3957 // eax <- in1.lo * in2.hi
3958 __ imull(eax, in1_lo);
3959 // in1.hi <- in1.hi * in2.lo
3960 __ imull(in1_hi, in2_lo);
3961 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3962 __ addl(in1_hi, eax);
3963 // move in1_lo to eax to prepare for double precision
3964 __ movl(eax, in1_lo);
3965 // edx:eax <- in1.lo * in2.lo
3966 __ mull(in2_lo);
3967 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3968 __ addl(in1_hi, edx);
3969 // in1.lo <- (in1.lo * in2.lo)[31:0];
3970 __ movl(in1_lo, eax);
3971 }
3972
3973 break;
3974 }
3975
3976 case DataType::Type::kFloat32: {
3977 DCHECK(first.Equals(locations->Out()));
3978 if (second.IsFpuRegister()) {
3979 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3980 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3981 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3982 DCHECK(const_area->IsEmittedAtUseSite());
3983 __ mulss(first.AsFpuRegister<XmmRegister>(),
3984 codegen_->LiteralFloatAddress(
3985 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3986 const_area->GetBaseMethodAddress(),
3987 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3988 } else {
3989 DCHECK(second.IsStackSlot());
3990 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3991 }
3992 break;
3993 }
3994
3995 case DataType::Type::kFloat64: {
3996 DCHECK(first.Equals(locations->Out()));
3997 if (second.IsFpuRegister()) {
3998 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3999 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
4000 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
4001 DCHECK(const_area->IsEmittedAtUseSite());
4002 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4003 codegen_->LiteralDoubleAddress(
4004 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4005 const_area->GetBaseMethodAddress(),
4006 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4007 } else {
4008 DCHECK(second.IsDoubleStackSlot());
4009 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4010 }
4011 break;
4012 }
4013
4014 default:
4015 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4016 }
4017 }
4018
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)4019 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
4020 uint32_t temp_offset,
4021 uint32_t stack_adjustment,
4022 bool is_fp,
4023 bool is_wide) {
4024 if (source.IsStackSlot()) {
4025 DCHECK(!is_wide);
4026 if (is_fp) {
4027 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4028 } else {
4029 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4030 }
4031 } else if (source.IsDoubleStackSlot()) {
4032 DCHECK(is_wide);
4033 if (is_fp) {
4034 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4035 } else {
4036 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4037 }
4038 } else {
4039 // Write the value to the temporary location on the stack and load to FP stack.
4040 if (!is_wide) {
4041 Location stack_temp = Location::StackSlot(temp_offset);
4042 codegen_->Move32(stack_temp, source);
4043 if (is_fp) {
4044 __ flds(Address(ESP, temp_offset));
4045 } else {
4046 __ filds(Address(ESP, temp_offset));
4047 }
4048 } else {
4049 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4050 codegen_->Move64(stack_temp, source);
4051 if (is_fp) {
4052 __ fldl(Address(ESP, temp_offset));
4053 } else {
4054 __ fildl(Address(ESP, temp_offset));
4055 }
4056 }
4057 }
4058 }
4059
GenerateRemFP(HRem * rem)4060 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
4061 DataType::Type type = rem->GetResultType();
4062 bool is_float = type == DataType::Type::kFloat32;
4063 size_t elem_size = DataType::Size(type);
4064 LocationSummary* locations = rem->GetLocations();
4065 Location first = locations->InAt(0);
4066 Location second = locations->InAt(1);
4067 Location out = locations->Out();
4068
4069 // Create stack space for 2 elements.
4070 // TODO: enhance register allocator to ask for stack temporaries.
4071 codegen_->IncreaseFrame(2 * elem_size);
4072
4073 // Load the values to the FP stack in reverse order, using temporaries if needed.
4074 const bool is_wide = !is_float;
4075 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
4076 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
4077
4078 // Loop doing FPREM until we stabilize.
4079 NearLabel retry;
4080 __ Bind(&retry);
4081 __ fprem();
4082
4083 // Move FP status to AX.
4084 __ fstsw();
4085
4086 // And see if the argument reduction is complete. This is signaled by the
4087 // C2 FPU flag bit set to 0.
4088 __ andl(EAX, Immediate(kC2ConditionMask));
4089 __ j(kNotEqual, &retry);
4090
4091 // We have settled on the final value. Retrieve it into an XMM register.
4092 // Store FP top of stack to real stack.
4093 if (is_float) {
4094 __ fsts(Address(ESP, 0));
4095 } else {
4096 __ fstl(Address(ESP, 0));
4097 }
4098
4099 // Pop the 2 items from the FP stack.
4100 __ fucompp();
4101
4102 // Load the value from the stack into an XMM register.
4103 DCHECK(out.IsFpuRegister()) << out;
4104 if (is_float) {
4105 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4106 } else {
4107 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4108 }
4109
4110 // And remove the temporary stack space we allocated.
4111 codegen_->DecreaseFrame(2 * elem_size);
4112 }
4113
4114
DivRemOneOrMinusOne(HBinaryOperation * instruction)4115 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4116 DCHECK(instruction->IsDiv() || instruction->IsRem());
4117
4118 LocationSummary* locations = instruction->GetLocations();
4119 DCHECK(locations->InAt(1).IsConstant());
4120 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
4121
4122 Register out_register = locations->Out().AsRegister<Register>();
4123 Register input_register = locations->InAt(0).AsRegister<Register>();
4124 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4125
4126 DCHECK(imm == 1 || imm == -1);
4127
4128 if (instruction->IsRem()) {
4129 __ xorl(out_register, out_register);
4130 } else {
4131 __ movl(out_register, input_register);
4132 if (imm == -1) {
4133 __ negl(out_register);
4134 }
4135 }
4136 }
4137
RemByPowerOfTwo(HRem * instruction)4138 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4139 LocationSummary* locations = instruction->GetLocations();
4140 Location second = locations->InAt(1);
4141
4142 Register out = locations->Out().AsRegister<Register>();
4143 Register numerator = locations->InAt(0).AsRegister<Register>();
4144
4145 int32_t imm = Int64FromConstant(second.GetConstant());
4146 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4147 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4148
4149 Register tmp = locations->GetTemp(0).AsRegister<Register>();
4150 NearLabel done;
4151 __ movl(out, numerator);
4152 __ andl(out, Immediate(abs_imm-1));
4153 __ j(Condition::kZero, &done);
4154 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4155 __ testl(numerator, numerator);
4156 __ cmovl(Condition::kLess, out, tmp);
4157 __ Bind(&done);
4158 }
4159
DivByPowerOfTwo(HDiv * instruction)4160 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4161 LocationSummary* locations = instruction->GetLocations();
4162
4163 Register out_register = locations->Out().AsRegister<Register>();
4164 Register input_register = locations->InAt(0).AsRegister<Register>();
4165 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4166 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4167 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4168
4169 Register num = locations->GetTemp(0).AsRegister<Register>();
4170
4171 __ leal(num, Address(input_register, abs_imm - 1));
4172 __ testl(input_register, input_register);
4173 __ cmovl(kGreaterEqual, num, input_register);
4174 int shift = CTZ(imm);
4175 __ sarl(num, Immediate(shift));
4176
4177 if (imm < 0) {
4178 __ negl(num);
4179 }
4180
4181 __ movl(out_register, num);
4182 }
4183
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4184 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4185 DCHECK(instruction->IsDiv() || instruction->IsRem());
4186
4187 LocationSummary* locations = instruction->GetLocations();
4188 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4189
4190 Register eax = locations->InAt(0).AsRegister<Register>();
4191 Register out = locations->Out().AsRegister<Register>();
4192 Register num;
4193 Register edx;
4194
4195 if (instruction->IsDiv()) {
4196 edx = locations->GetTemp(0).AsRegister<Register>();
4197 num = locations->GetTemp(1).AsRegister<Register>();
4198 } else {
4199 edx = locations->Out().AsRegister<Register>();
4200 num = locations->GetTemp(0).AsRegister<Register>();
4201 }
4202
4203 DCHECK_EQ(EAX, eax);
4204 DCHECK_EQ(EDX, edx);
4205 if (instruction->IsDiv()) {
4206 DCHECK_EQ(EAX, out);
4207 } else {
4208 DCHECK_EQ(EDX, out);
4209 }
4210
4211 int64_t magic;
4212 int shift;
4213 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4214
4215 // Save the numerator.
4216 __ movl(num, eax);
4217
4218 // EAX = magic
4219 __ movl(eax, Immediate(magic));
4220
4221 // EDX:EAX = magic * numerator
4222 __ imull(num);
4223
4224 if (imm > 0 && magic < 0) {
4225 // EDX += num
4226 __ addl(edx, num);
4227 } else if (imm < 0 && magic > 0) {
4228 __ subl(edx, num);
4229 }
4230
4231 // Shift if needed.
4232 if (shift != 0) {
4233 __ sarl(edx, Immediate(shift));
4234 }
4235
4236 // EDX += 1 if EDX < 0
4237 __ movl(eax, edx);
4238 __ shrl(edx, Immediate(31));
4239 __ addl(edx, eax);
4240
4241 if (instruction->IsRem()) {
4242 __ movl(eax, num);
4243 __ imull(edx, Immediate(imm));
4244 __ subl(eax, edx);
4245 __ movl(edx, eax);
4246 } else {
4247 __ movl(eax, edx);
4248 }
4249 }
4250
GenerateDivRemIntegral(HBinaryOperation * instruction)4251 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4252 DCHECK(instruction->IsDiv() || instruction->IsRem());
4253
4254 LocationSummary* locations = instruction->GetLocations();
4255 Location out = locations->Out();
4256 Location first = locations->InAt(0);
4257 Location second = locations->InAt(1);
4258 bool is_div = instruction->IsDiv();
4259
4260 switch (instruction->GetResultType()) {
4261 case DataType::Type::kInt32: {
4262 DCHECK_EQ(EAX, first.AsRegister<Register>());
4263 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4264
4265 if (second.IsConstant()) {
4266 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4267
4268 if (imm == 0) {
4269 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4270 } else if (imm == 1 || imm == -1) {
4271 DivRemOneOrMinusOne(instruction);
4272 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4273 if (is_div) {
4274 DivByPowerOfTwo(instruction->AsDiv());
4275 } else {
4276 RemByPowerOfTwo(instruction->AsRem());
4277 }
4278 } else {
4279 DCHECK(imm <= -2 || imm >= 2);
4280 GenerateDivRemWithAnyConstant(instruction);
4281 }
4282 } else {
4283 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4284 instruction, out.AsRegister<Register>(), is_div);
4285 codegen_->AddSlowPath(slow_path);
4286
4287 Register second_reg = second.AsRegister<Register>();
4288 // 0x80000000/-1 triggers an arithmetic exception!
4289 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4290 // it's safe to just use negl instead of more complex comparisons.
4291
4292 __ cmpl(second_reg, Immediate(-1));
4293 __ j(kEqual, slow_path->GetEntryLabel());
4294
4295 // edx:eax <- sign-extended of eax
4296 __ cdq();
4297 // eax = quotient, edx = remainder
4298 __ idivl(second_reg);
4299 __ Bind(slow_path->GetExitLabel());
4300 }
4301 break;
4302 }
4303
4304 case DataType::Type::kInt64: {
4305 InvokeRuntimeCallingConvention calling_convention;
4306 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4307 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4308 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4309 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4310 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4311 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4312
4313 if (is_div) {
4314 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4315 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4316 } else {
4317 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4318 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4319 }
4320 break;
4321 }
4322
4323 default:
4324 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4325 }
4326 }
4327
VisitDiv(HDiv * div)4328 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4329 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4330 ? LocationSummary::kCallOnMainOnly
4331 : LocationSummary::kNoCall;
4332 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4333
4334 switch (div->GetResultType()) {
4335 case DataType::Type::kInt32: {
4336 locations->SetInAt(0, Location::RegisterLocation(EAX));
4337 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4338 locations->SetOut(Location::SameAsFirstInput());
4339 // Intel uses edx:eax as the dividend.
4340 locations->AddTemp(Location::RegisterLocation(EDX));
4341 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4342 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4343 // output and request another temp.
4344 if (div->InputAt(1)->IsIntConstant()) {
4345 locations->AddTemp(Location::RequiresRegister());
4346 }
4347 break;
4348 }
4349 case DataType::Type::kInt64: {
4350 InvokeRuntimeCallingConvention calling_convention;
4351 locations->SetInAt(0, Location::RegisterPairLocation(
4352 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4353 locations->SetInAt(1, Location::RegisterPairLocation(
4354 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4355 // Runtime helper puts the result in EAX, EDX.
4356 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4357 break;
4358 }
4359 case DataType::Type::kFloat32:
4360 case DataType::Type::kFloat64: {
4361 locations->SetInAt(0, Location::RequiresFpuRegister());
4362 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4363 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4364 } else if (div->InputAt(1)->IsConstant()) {
4365 locations->SetInAt(1, Location::RequiresFpuRegister());
4366 } else {
4367 locations->SetInAt(1, Location::Any());
4368 }
4369 locations->SetOut(Location::SameAsFirstInput());
4370 break;
4371 }
4372
4373 default:
4374 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4375 }
4376 }
4377
VisitDiv(HDiv * div)4378 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4379 LocationSummary* locations = div->GetLocations();
4380 Location first = locations->InAt(0);
4381 Location second = locations->InAt(1);
4382
4383 switch (div->GetResultType()) {
4384 case DataType::Type::kInt32:
4385 case DataType::Type::kInt64: {
4386 GenerateDivRemIntegral(div);
4387 break;
4388 }
4389
4390 case DataType::Type::kFloat32: {
4391 if (second.IsFpuRegister()) {
4392 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4393 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4394 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4395 DCHECK(const_area->IsEmittedAtUseSite());
4396 __ divss(first.AsFpuRegister<XmmRegister>(),
4397 codegen_->LiteralFloatAddress(
4398 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4399 const_area->GetBaseMethodAddress(),
4400 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4401 } else {
4402 DCHECK(second.IsStackSlot());
4403 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4404 }
4405 break;
4406 }
4407
4408 case DataType::Type::kFloat64: {
4409 if (second.IsFpuRegister()) {
4410 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4411 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4412 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4413 DCHECK(const_area->IsEmittedAtUseSite());
4414 __ divsd(first.AsFpuRegister<XmmRegister>(),
4415 codegen_->LiteralDoubleAddress(
4416 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4417 const_area->GetBaseMethodAddress(),
4418 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4419 } else {
4420 DCHECK(second.IsDoubleStackSlot());
4421 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4422 }
4423 break;
4424 }
4425
4426 default:
4427 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4428 }
4429 }
4430
VisitRem(HRem * rem)4431 void LocationsBuilderX86::VisitRem(HRem* rem) {
4432 DataType::Type type = rem->GetResultType();
4433
4434 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4435 ? LocationSummary::kCallOnMainOnly
4436 : LocationSummary::kNoCall;
4437 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4438
4439 switch (type) {
4440 case DataType::Type::kInt32: {
4441 locations->SetInAt(0, Location::RegisterLocation(EAX));
4442 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4443 locations->SetOut(Location::RegisterLocation(EDX));
4444 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4445 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4446 // output and request another temp.
4447 if (rem->InputAt(1)->IsIntConstant()) {
4448 locations->AddTemp(Location::RequiresRegister());
4449 }
4450 break;
4451 }
4452 case DataType::Type::kInt64: {
4453 InvokeRuntimeCallingConvention calling_convention;
4454 locations->SetInAt(0, Location::RegisterPairLocation(
4455 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4456 locations->SetInAt(1, Location::RegisterPairLocation(
4457 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4458 // Runtime helper puts the result in EAX, EDX.
4459 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4460 break;
4461 }
4462 case DataType::Type::kFloat64:
4463 case DataType::Type::kFloat32: {
4464 locations->SetInAt(0, Location::Any());
4465 locations->SetInAt(1, Location::Any());
4466 locations->SetOut(Location::RequiresFpuRegister());
4467 locations->AddTemp(Location::RegisterLocation(EAX));
4468 break;
4469 }
4470
4471 default:
4472 LOG(FATAL) << "Unexpected rem type " << type;
4473 }
4474 }
4475
VisitRem(HRem * rem)4476 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4477 DataType::Type type = rem->GetResultType();
4478 switch (type) {
4479 case DataType::Type::kInt32:
4480 case DataType::Type::kInt64: {
4481 GenerateDivRemIntegral(rem);
4482 break;
4483 }
4484 case DataType::Type::kFloat32:
4485 case DataType::Type::kFloat64: {
4486 GenerateRemFP(rem);
4487 break;
4488 }
4489 default:
4490 LOG(FATAL) << "Unexpected rem type " << type;
4491 }
4492 }
4493
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4494 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4495 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4496 switch (minmax->GetResultType()) {
4497 case DataType::Type::kInt32:
4498 locations->SetInAt(0, Location::RequiresRegister());
4499 locations->SetInAt(1, Location::RequiresRegister());
4500 locations->SetOut(Location::SameAsFirstInput());
4501 break;
4502 case DataType::Type::kInt64:
4503 locations->SetInAt(0, Location::RequiresRegister());
4504 locations->SetInAt(1, Location::RequiresRegister());
4505 locations->SetOut(Location::SameAsFirstInput());
4506 // Register to use to perform a long subtract to set cc.
4507 locations->AddTemp(Location::RequiresRegister());
4508 break;
4509 case DataType::Type::kFloat32:
4510 locations->SetInAt(0, Location::RequiresFpuRegister());
4511 locations->SetInAt(1, Location::RequiresFpuRegister());
4512 locations->SetOut(Location::SameAsFirstInput());
4513 locations->AddTemp(Location::RequiresRegister());
4514 break;
4515 case DataType::Type::kFloat64:
4516 locations->SetInAt(0, Location::RequiresFpuRegister());
4517 locations->SetInAt(1, Location::RequiresFpuRegister());
4518 locations->SetOut(Location::SameAsFirstInput());
4519 break;
4520 default:
4521 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4522 }
4523 }
4524
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4525 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4526 bool is_min,
4527 DataType::Type type) {
4528 Location op1_loc = locations->InAt(0);
4529 Location op2_loc = locations->InAt(1);
4530
4531 // Shortcut for same input locations.
4532 if (op1_loc.Equals(op2_loc)) {
4533 // Can return immediately, as op1_loc == out_loc.
4534 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4535 // a copy here.
4536 DCHECK(locations->Out().Equals(op1_loc));
4537 return;
4538 }
4539
4540 if (type == DataType::Type::kInt64) {
4541 // Need to perform a subtract to get the sign right.
4542 // op1 is already in the same location as the output.
4543 Location output = locations->Out();
4544 Register output_lo = output.AsRegisterPairLow<Register>();
4545 Register output_hi = output.AsRegisterPairHigh<Register>();
4546
4547 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4548 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4549
4550 // The comparison is performed by subtracting the second operand from
4551 // the first operand and then setting the status flags in the same
4552 // manner as the SUB instruction."
4553 __ cmpl(output_lo, op2_lo);
4554
4555 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4556 Register temp = locations->GetTemp(0).AsRegister<Register>();
4557 __ movl(temp, output_hi);
4558 __ sbbl(temp, op2_hi);
4559
4560 // Now the condition code is correct.
4561 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4562 __ cmovl(cond, output_lo, op2_lo);
4563 __ cmovl(cond, output_hi, op2_hi);
4564 } else {
4565 DCHECK_EQ(type, DataType::Type::kInt32);
4566 Register out = locations->Out().AsRegister<Register>();
4567 Register op2 = op2_loc.AsRegister<Register>();
4568
4569 // (out := op1)
4570 // out <=? op2
4571 // if out is min jmp done
4572 // out := op2
4573 // done:
4574
4575 __ cmpl(out, op2);
4576 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4577 __ cmovl(cond, out, op2);
4578 }
4579 }
4580
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4581 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4582 bool is_min,
4583 DataType::Type type) {
4584 Location op1_loc = locations->InAt(0);
4585 Location op2_loc = locations->InAt(1);
4586 Location out_loc = locations->Out();
4587 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4588
4589 // Shortcut for same input locations.
4590 if (op1_loc.Equals(op2_loc)) {
4591 DCHECK(out_loc.Equals(op1_loc));
4592 return;
4593 }
4594
4595 // (out := op1)
4596 // out <=? op2
4597 // if Nan jmp Nan_label
4598 // if out is min jmp done
4599 // if op2 is min jmp op2_label
4600 // handle -0/+0
4601 // jmp done
4602 // Nan_label:
4603 // out := NaN
4604 // op2_label:
4605 // out := op2
4606 // done:
4607 //
4608 // This removes one jmp, but needs to copy one input (op1) to out.
4609 //
4610 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4611
4612 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4613
4614 NearLabel nan, done, op2_label;
4615 if (type == DataType::Type::kFloat64) {
4616 __ ucomisd(out, op2);
4617 } else {
4618 DCHECK_EQ(type, DataType::Type::kFloat32);
4619 __ ucomiss(out, op2);
4620 }
4621
4622 __ j(Condition::kParityEven, &nan);
4623
4624 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4625 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4626
4627 // Handle 0.0/-0.0.
4628 if (is_min) {
4629 if (type == DataType::Type::kFloat64) {
4630 __ orpd(out, op2);
4631 } else {
4632 __ orps(out, op2);
4633 }
4634 } else {
4635 if (type == DataType::Type::kFloat64) {
4636 __ andpd(out, op2);
4637 } else {
4638 __ andps(out, op2);
4639 }
4640 }
4641 __ jmp(&done);
4642
4643 // NaN handling.
4644 __ Bind(&nan);
4645 if (type == DataType::Type::kFloat64) {
4646 // TODO: Use a constant from the constant table (requires extra input).
4647 __ LoadLongConstant(out, kDoubleNaN);
4648 } else {
4649 Register constant = locations->GetTemp(0).AsRegister<Register>();
4650 __ movl(constant, Immediate(kFloatNaN));
4651 __ movd(out, constant);
4652 }
4653 __ jmp(&done);
4654
4655 // out := op2;
4656 __ Bind(&op2_label);
4657 if (type == DataType::Type::kFloat64) {
4658 __ movsd(out, op2);
4659 } else {
4660 __ movss(out, op2);
4661 }
4662
4663 // Done.
4664 __ Bind(&done);
4665 }
4666
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4667 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4668 DataType::Type type = minmax->GetResultType();
4669 switch (type) {
4670 case DataType::Type::kInt32:
4671 case DataType::Type::kInt64:
4672 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4673 break;
4674 case DataType::Type::kFloat32:
4675 case DataType::Type::kFloat64:
4676 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4677 break;
4678 default:
4679 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4680 }
4681 }
4682
VisitMin(HMin * min)4683 void LocationsBuilderX86::VisitMin(HMin* min) {
4684 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4685 }
4686
VisitMin(HMin * min)4687 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4688 GenerateMinMax(min, /*is_min*/ true);
4689 }
4690
VisitMax(HMax * max)4691 void LocationsBuilderX86::VisitMax(HMax* max) {
4692 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4693 }
4694
VisitMax(HMax * max)4695 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4696 GenerateMinMax(max, /*is_min*/ false);
4697 }
4698
VisitAbs(HAbs * abs)4699 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4700 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4701 switch (abs->GetResultType()) {
4702 case DataType::Type::kInt32:
4703 locations->SetInAt(0, Location::RegisterLocation(EAX));
4704 locations->SetOut(Location::SameAsFirstInput());
4705 locations->AddTemp(Location::RegisterLocation(EDX));
4706 break;
4707 case DataType::Type::kInt64:
4708 locations->SetInAt(0, Location::RequiresRegister());
4709 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4710 locations->AddTemp(Location::RequiresRegister());
4711 break;
4712 case DataType::Type::kFloat32:
4713 locations->SetInAt(0, Location::RequiresFpuRegister());
4714 locations->SetOut(Location::SameAsFirstInput());
4715 locations->AddTemp(Location::RequiresFpuRegister());
4716 locations->AddTemp(Location::RequiresRegister());
4717 break;
4718 case DataType::Type::kFloat64:
4719 locations->SetInAt(0, Location::RequiresFpuRegister());
4720 locations->SetOut(Location::SameAsFirstInput());
4721 locations->AddTemp(Location::RequiresFpuRegister());
4722 break;
4723 default:
4724 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4725 }
4726 }
4727
VisitAbs(HAbs * abs)4728 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4729 LocationSummary* locations = abs->GetLocations();
4730 switch (abs->GetResultType()) {
4731 case DataType::Type::kInt32: {
4732 Register out = locations->Out().AsRegister<Register>();
4733 DCHECK_EQ(out, EAX);
4734 Register temp = locations->GetTemp(0).AsRegister<Register>();
4735 DCHECK_EQ(temp, EDX);
4736 // Sign extend EAX into EDX.
4737 __ cdq();
4738 // XOR EAX with sign.
4739 __ xorl(EAX, EDX);
4740 // Subtract out sign to correct.
4741 __ subl(EAX, EDX);
4742 // The result is in EAX.
4743 break;
4744 }
4745 case DataType::Type::kInt64: {
4746 Location input = locations->InAt(0);
4747 Register input_lo = input.AsRegisterPairLow<Register>();
4748 Register input_hi = input.AsRegisterPairHigh<Register>();
4749 Location output = locations->Out();
4750 Register output_lo = output.AsRegisterPairLow<Register>();
4751 Register output_hi = output.AsRegisterPairHigh<Register>();
4752 Register temp = locations->GetTemp(0).AsRegister<Register>();
4753 // Compute the sign into the temporary.
4754 __ movl(temp, input_hi);
4755 __ sarl(temp, Immediate(31));
4756 // Store the sign into the output.
4757 __ movl(output_lo, temp);
4758 __ movl(output_hi, temp);
4759 // XOR the input to the output.
4760 __ xorl(output_lo, input_lo);
4761 __ xorl(output_hi, input_hi);
4762 // Subtract the sign.
4763 __ subl(output_lo, temp);
4764 __ sbbl(output_hi, temp);
4765 break;
4766 }
4767 case DataType::Type::kFloat32: {
4768 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4769 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4770 Register constant = locations->GetTemp(1).AsRegister<Register>();
4771 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4772 __ movd(temp, constant);
4773 __ andps(out, temp);
4774 break;
4775 }
4776 case DataType::Type::kFloat64: {
4777 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4778 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4779 // TODO: Use a constant from the constant table (requires extra input).
4780 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4781 __ andpd(out, temp);
4782 break;
4783 }
4784 default:
4785 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4786 }
4787 }
4788
VisitDivZeroCheck(HDivZeroCheck * instruction)4789 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4790 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4791 switch (instruction->GetType()) {
4792 case DataType::Type::kBool:
4793 case DataType::Type::kUint8:
4794 case DataType::Type::kInt8:
4795 case DataType::Type::kUint16:
4796 case DataType::Type::kInt16:
4797 case DataType::Type::kInt32: {
4798 locations->SetInAt(0, Location::Any());
4799 break;
4800 }
4801 case DataType::Type::kInt64: {
4802 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4803 if (!instruction->IsConstant()) {
4804 locations->AddTemp(Location::RequiresRegister());
4805 }
4806 break;
4807 }
4808 default:
4809 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4810 }
4811 }
4812
VisitDivZeroCheck(HDivZeroCheck * instruction)4813 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4814 SlowPathCode* slow_path =
4815 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4816 codegen_->AddSlowPath(slow_path);
4817
4818 LocationSummary* locations = instruction->GetLocations();
4819 Location value = locations->InAt(0);
4820
4821 switch (instruction->GetType()) {
4822 case DataType::Type::kBool:
4823 case DataType::Type::kUint8:
4824 case DataType::Type::kInt8:
4825 case DataType::Type::kUint16:
4826 case DataType::Type::kInt16:
4827 case DataType::Type::kInt32: {
4828 if (value.IsRegister()) {
4829 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4830 __ j(kEqual, slow_path->GetEntryLabel());
4831 } else if (value.IsStackSlot()) {
4832 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4833 __ j(kEqual, slow_path->GetEntryLabel());
4834 } else {
4835 DCHECK(value.IsConstant()) << value;
4836 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4837 __ jmp(slow_path->GetEntryLabel());
4838 }
4839 }
4840 break;
4841 }
4842 case DataType::Type::kInt64: {
4843 if (value.IsRegisterPair()) {
4844 Register temp = locations->GetTemp(0).AsRegister<Register>();
4845 __ movl(temp, value.AsRegisterPairLow<Register>());
4846 __ orl(temp, value.AsRegisterPairHigh<Register>());
4847 __ j(kEqual, slow_path->GetEntryLabel());
4848 } else {
4849 DCHECK(value.IsConstant()) << value;
4850 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4851 __ jmp(slow_path->GetEntryLabel());
4852 }
4853 }
4854 break;
4855 }
4856 default:
4857 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4858 }
4859 }
4860
HandleShift(HBinaryOperation * op)4861 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4862 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4863
4864 LocationSummary* locations =
4865 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4866
4867 switch (op->GetResultType()) {
4868 case DataType::Type::kInt32:
4869 case DataType::Type::kInt64: {
4870 // Can't have Location::Any() and output SameAsFirstInput()
4871 locations->SetInAt(0, Location::RequiresRegister());
4872 // The shift count needs to be in CL or a constant.
4873 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4874 locations->SetOut(Location::SameAsFirstInput());
4875 break;
4876 }
4877 default:
4878 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4879 }
4880 }
4881
HandleShift(HBinaryOperation * op)4882 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4883 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4884
4885 LocationSummary* locations = op->GetLocations();
4886 Location first = locations->InAt(0);
4887 Location second = locations->InAt(1);
4888 DCHECK(first.Equals(locations->Out()));
4889
4890 switch (op->GetResultType()) {
4891 case DataType::Type::kInt32: {
4892 DCHECK(first.IsRegister());
4893 Register first_reg = first.AsRegister<Register>();
4894 if (second.IsRegister()) {
4895 Register second_reg = second.AsRegister<Register>();
4896 DCHECK_EQ(ECX, second_reg);
4897 if (op->IsShl()) {
4898 __ shll(first_reg, second_reg);
4899 } else if (op->IsShr()) {
4900 __ sarl(first_reg, second_reg);
4901 } else {
4902 __ shrl(first_reg, second_reg);
4903 }
4904 } else {
4905 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4906 if (shift == 0) {
4907 return;
4908 }
4909 Immediate imm(shift);
4910 if (op->IsShl()) {
4911 __ shll(first_reg, imm);
4912 } else if (op->IsShr()) {
4913 __ sarl(first_reg, imm);
4914 } else {
4915 __ shrl(first_reg, imm);
4916 }
4917 }
4918 break;
4919 }
4920 case DataType::Type::kInt64: {
4921 if (second.IsRegister()) {
4922 Register second_reg = second.AsRegister<Register>();
4923 DCHECK_EQ(ECX, second_reg);
4924 if (op->IsShl()) {
4925 GenerateShlLong(first, second_reg);
4926 } else if (op->IsShr()) {
4927 GenerateShrLong(first, second_reg);
4928 } else {
4929 GenerateUShrLong(first, second_reg);
4930 }
4931 } else {
4932 // Shift by a constant.
4933 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4934 // Nothing to do if the shift is 0, as the input is already the output.
4935 if (shift != 0) {
4936 if (op->IsShl()) {
4937 GenerateShlLong(first, shift);
4938 } else if (op->IsShr()) {
4939 GenerateShrLong(first, shift);
4940 } else {
4941 GenerateUShrLong(first, shift);
4942 }
4943 }
4944 }
4945 break;
4946 }
4947 default:
4948 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4949 }
4950 }
4951
GenerateShlLong(const Location & loc,int shift)4952 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4953 Register low = loc.AsRegisterPairLow<Register>();
4954 Register high = loc.AsRegisterPairHigh<Register>();
4955 if (shift == 1) {
4956 // This is just an addition.
4957 __ addl(low, low);
4958 __ adcl(high, high);
4959 } else if (shift == 32) {
4960 // Shift by 32 is easy. High gets low, and low gets 0.
4961 codegen_->EmitParallelMoves(
4962 loc.ToLow(),
4963 loc.ToHigh(),
4964 DataType::Type::kInt32,
4965 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4966 loc.ToLow(),
4967 DataType::Type::kInt32);
4968 } else if (shift > 32) {
4969 // Low part becomes 0. High part is low part << (shift-32).
4970 __ movl(high, low);
4971 __ shll(high, Immediate(shift - 32));
4972 __ xorl(low, low);
4973 } else {
4974 // Between 1 and 31.
4975 __ shld(high, low, Immediate(shift));
4976 __ shll(low, Immediate(shift));
4977 }
4978 }
4979
GenerateShlLong(const Location & loc,Register shifter)4980 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4981 NearLabel done;
4982 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4983 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4984 __ testl(shifter, Immediate(32));
4985 __ j(kEqual, &done);
4986 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4987 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4988 __ Bind(&done);
4989 }
4990
GenerateShrLong(const Location & loc,int shift)4991 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4992 Register low = loc.AsRegisterPairLow<Register>();
4993 Register high = loc.AsRegisterPairHigh<Register>();
4994 if (shift == 32) {
4995 // Need to copy the sign.
4996 DCHECK_NE(low, high);
4997 __ movl(low, high);
4998 __ sarl(high, Immediate(31));
4999 } else if (shift > 32) {
5000 DCHECK_NE(low, high);
5001 // High part becomes sign. Low part is shifted by shift - 32.
5002 __ movl(low, high);
5003 __ sarl(high, Immediate(31));
5004 __ sarl(low, Immediate(shift - 32));
5005 } else {
5006 // Between 1 and 31.
5007 __ shrd(low, high, Immediate(shift));
5008 __ sarl(high, Immediate(shift));
5009 }
5010 }
5011
GenerateShrLong(const Location & loc,Register shifter)5012 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
5013 NearLabel done;
5014 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5015 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
5016 __ testl(shifter, Immediate(32));
5017 __ j(kEqual, &done);
5018 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5019 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
5020 __ Bind(&done);
5021 }
5022
GenerateUShrLong(const Location & loc,int shift)5023 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
5024 Register low = loc.AsRegisterPairLow<Register>();
5025 Register high = loc.AsRegisterPairHigh<Register>();
5026 if (shift == 32) {
5027 // Shift by 32 is easy. Low gets high, and high gets 0.
5028 codegen_->EmitParallelMoves(
5029 loc.ToHigh(),
5030 loc.ToLow(),
5031 DataType::Type::kInt32,
5032 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
5033 loc.ToHigh(),
5034 DataType::Type::kInt32);
5035 } else if (shift > 32) {
5036 // Low part is high >> (shift - 32). High part becomes 0.
5037 __ movl(low, high);
5038 __ shrl(low, Immediate(shift - 32));
5039 __ xorl(high, high);
5040 } else {
5041 // Between 1 and 31.
5042 __ shrd(low, high, Immediate(shift));
5043 __ shrl(high, Immediate(shift));
5044 }
5045 }
5046
GenerateUShrLong(const Location & loc,Register shifter)5047 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
5048 NearLabel done;
5049 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5050 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
5051 __ testl(shifter, Immediate(32));
5052 __ j(kEqual, &done);
5053 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5054 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
5055 __ Bind(&done);
5056 }
5057
VisitRor(HRor * ror)5058 void LocationsBuilderX86::VisitRor(HRor* ror) {
5059 LocationSummary* locations =
5060 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5061
5062 switch (ror->GetResultType()) {
5063 case DataType::Type::kInt64:
5064 // Add the temporary needed.
5065 locations->AddTemp(Location::RequiresRegister());
5066 FALLTHROUGH_INTENDED;
5067 case DataType::Type::kInt32:
5068 locations->SetInAt(0, Location::RequiresRegister());
5069 // The shift count needs to be in CL (unless it is a constant).
5070 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
5071 locations->SetOut(Location::SameAsFirstInput());
5072 break;
5073 default:
5074 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5075 UNREACHABLE();
5076 }
5077 }
5078
VisitRor(HRor * ror)5079 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
5080 LocationSummary* locations = ror->GetLocations();
5081 Location first = locations->InAt(0);
5082 Location second = locations->InAt(1);
5083
5084 if (ror->GetResultType() == DataType::Type::kInt32) {
5085 Register first_reg = first.AsRegister<Register>();
5086 if (second.IsRegister()) {
5087 Register second_reg = second.AsRegister<Register>();
5088 __ rorl(first_reg, second_reg);
5089 } else {
5090 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5091 __ rorl(first_reg, imm);
5092 }
5093 return;
5094 }
5095
5096 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
5097 Register first_reg_lo = first.AsRegisterPairLow<Register>();
5098 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
5099 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
5100 if (second.IsRegister()) {
5101 Register second_reg = second.AsRegister<Register>();
5102 DCHECK_EQ(second_reg, ECX);
5103 __ movl(temp_reg, first_reg_hi);
5104 __ shrd(first_reg_hi, first_reg_lo, second_reg);
5105 __ shrd(first_reg_lo, temp_reg, second_reg);
5106 __ movl(temp_reg, first_reg_hi);
5107 __ testl(second_reg, Immediate(32));
5108 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
5109 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
5110 } else {
5111 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
5112 if (shift_amt == 0) {
5113 // Already fine.
5114 return;
5115 }
5116 if (shift_amt == 32) {
5117 // Just swap.
5118 __ movl(temp_reg, first_reg_lo);
5119 __ movl(first_reg_lo, first_reg_hi);
5120 __ movl(first_reg_hi, temp_reg);
5121 return;
5122 }
5123
5124 Immediate imm(shift_amt);
5125 // Save the constents of the low value.
5126 __ movl(temp_reg, first_reg_lo);
5127
5128 // Shift right into low, feeding bits from high.
5129 __ shrd(first_reg_lo, first_reg_hi, imm);
5130
5131 // Shift right into high, feeding bits from the original low.
5132 __ shrd(first_reg_hi, temp_reg, imm);
5133
5134 // Swap if needed.
5135 if (shift_amt > 32) {
5136 __ movl(temp_reg, first_reg_lo);
5137 __ movl(first_reg_lo, first_reg_hi);
5138 __ movl(first_reg_hi, temp_reg);
5139 }
5140 }
5141 }
5142
VisitShl(HShl * shl)5143 void LocationsBuilderX86::VisitShl(HShl* shl) {
5144 HandleShift(shl);
5145 }
5146
VisitShl(HShl * shl)5147 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5148 HandleShift(shl);
5149 }
5150
VisitShr(HShr * shr)5151 void LocationsBuilderX86::VisitShr(HShr* shr) {
5152 HandleShift(shr);
5153 }
5154
VisitShr(HShr * shr)5155 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5156 HandleShift(shr);
5157 }
5158
VisitUShr(HUShr * ushr)5159 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5160 HandleShift(ushr);
5161 }
5162
VisitUShr(HUShr * ushr)5163 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5164 HandleShift(ushr);
5165 }
5166
VisitNewInstance(HNewInstance * instruction)5167 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5168 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5169 instruction, LocationSummary::kCallOnMainOnly);
5170 locations->SetOut(Location::RegisterLocation(EAX));
5171 InvokeRuntimeCallingConvention calling_convention;
5172 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5173 }
5174
VisitNewInstance(HNewInstance * instruction)5175 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5176 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5177 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5178 DCHECK(!codegen_->IsLeafMethod());
5179 }
5180
VisitNewArray(HNewArray * instruction)5181 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5182 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5183 instruction, LocationSummary::kCallOnMainOnly);
5184 locations->SetOut(Location::RegisterLocation(EAX));
5185 InvokeRuntimeCallingConvention calling_convention;
5186 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5187 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5188 }
5189
VisitNewArray(HNewArray * instruction)5190 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5191 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5192 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5193 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5194 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5195 DCHECK(!codegen_->IsLeafMethod());
5196 }
5197
VisitParameterValue(HParameterValue * instruction)5198 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5199 LocationSummary* locations =
5200 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5201 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5202 if (location.IsStackSlot()) {
5203 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5204 } else if (location.IsDoubleStackSlot()) {
5205 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5206 }
5207 locations->SetOut(location);
5208 }
5209
VisitParameterValue(HParameterValue * instruction)5210 void InstructionCodeGeneratorX86::VisitParameterValue(
5211 [[maybe_unused]] HParameterValue* instruction) {}
5212
VisitCurrentMethod(HCurrentMethod * instruction)5213 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5214 LocationSummary* locations =
5215 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5216 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5217 }
5218
VisitCurrentMethod(HCurrentMethod * instruction)5219 void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
5220 }
5221
VisitClassTableGet(HClassTableGet * instruction)5222 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5223 LocationSummary* locations =
5224 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5225 locations->SetInAt(0, Location::RequiresRegister());
5226 locations->SetOut(Location::RequiresRegister());
5227 }
5228
VisitClassTableGet(HClassTableGet * instruction)5229 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5230 LocationSummary* locations = instruction->GetLocations();
5231 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5232 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5233 instruction->GetIndex(), kX86PointerSize).SizeValue();
5234 __ movl(locations->Out().AsRegister<Register>(),
5235 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5236 } else {
5237 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5238 instruction->GetIndex(), kX86PointerSize));
5239 __ movl(locations->Out().AsRegister<Register>(),
5240 Address(locations->InAt(0).AsRegister<Register>(),
5241 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5242 // temp = temp->GetImtEntryAt(method_offset);
5243 __ movl(locations->Out().AsRegister<Register>(),
5244 Address(locations->Out().AsRegister<Register>(), method_offset));
5245 }
5246 }
5247
VisitNot(HNot * not_)5248 void LocationsBuilderX86::VisitNot(HNot* not_) {
5249 LocationSummary* locations =
5250 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5251 locations->SetInAt(0, Location::RequiresRegister());
5252 locations->SetOut(Location::SameAsFirstInput());
5253 }
5254
VisitNot(HNot * not_)5255 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5256 LocationSummary* locations = not_->GetLocations();
5257 Location in = locations->InAt(0);
5258 Location out = locations->Out();
5259 DCHECK(in.Equals(out));
5260 switch (not_->GetResultType()) {
5261 case DataType::Type::kInt32:
5262 __ notl(out.AsRegister<Register>());
5263 break;
5264
5265 case DataType::Type::kInt64:
5266 __ notl(out.AsRegisterPairLow<Register>());
5267 __ notl(out.AsRegisterPairHigh<Register>());
5268 break;
5269
5270 default:
5271 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5272 }
5273 }
5274
VisitBooleanNot(HBooleanNot * bool_not)5275 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5276 LocationSummary* locations =
5277 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5278 locations->SetInAt(0, Location::RequiresRegister());
5279 locations->SetOut(Location::SameAsFirstInput());
5280 }
5281
VisitBooleanNot(HBooleanNot * bool_not)5282 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5283 LocationSummary* locations = bool_not->GetLocations();
5284 Location in = locations->InAt(0);
5285 Location out = locations->Out();
5286 DCHECK(in.Equals(out));
5287 __ xorl(out.AsRegister<Register>(), Immediate(1));
5288 }
5289
VisitCompare(HCompare * compare)5290 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5291 LocationSummary* locations =
5292 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5293 switch (compare->InputAt(0)->GetType()) {
5294 case DataType::Type::kBool:
5295 case DataType::Type::kUint8:
5296 case DataType::Type::kInt8:
5297 case DataType::Type::kUint16:
5298 case DataType::Type::kInt16:
5299 case DataType::Type::kInt32:
5300 case DataType::Type::kInt64: {
5301 locations->SetInAt(0, Location::RequiresRegister());
5302 locations->SetInAt(1, Location::Any());
5303 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5304 break;
5305 }
5306 case DataType::Type::kFloat32:
5307 case DataType::Type::kFloat64: {
5308 locations->SetInAt(0, Location::RequiresFpuRegister());
5309 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5310 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5311 } else if (compare->InputAt(1)->IsConstant()) {
5312 locations->SetInAt(1, Location::RequiresFpuRegister());
5313 } else {
5314 locations->SetInAt(1, Location::Any());
5315 }
5316 locations->SetOut(Location::RequiresRegister());
5317 break;
5318 }
5319 default:
5320 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5321 }
5322 }
5323
VisitCompare(HCompare * compare)5324 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5325 LocationSummary* locations = compare->GetLocations();
5326 Register out = locations->Out().AsRegister<Register>();
5327 Location left = locations->InAt(0);
5328 Location right = locations->InAt(1);
5329
5330 NearLabel less, greater, done;
5331 Condition less_cond = kLess;
5332
5333 switch (compare->InputAt(0)->GetType()) {
5334 case DataType::Type::kBool:
5335 case DataType::Type::kUint8:
5336 case DataType::Type::kInt8:
5337 case DataType::Type::kUint16:
5338 case DataType::Type::kInt16:
5339 case DataType::Type::kInt32: {
5340 codegen_->GenerateIntCompare(left, right);
5341 break;
5342 }
5343 case DataType::Type::kInt64: {
5344 Register left_low = left.AsRegisterPairLow<Register>();
5345 Register left_high = left.AsRegisterPairHigh<Register>();
5346 int32_t val_low = 0;
5347 int32_t val_high = 0;
5348 bool right_is_const = false;
5349
5350 if (right.IsConstant()) {
5351 DCHECK(right.GetConstant()->IsLongConstant());
5352 right_is_const = true;
5353 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5354 val_low = Low32Bits(val);
5355 val_high = High32Bits(val);
5356 }
5357
5358 if (right.IsRegisterPair()) {
5359 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5360 } else if (right.IsDoubleStackSlot()) {
5361 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5362 } else {
5363 DCHECK(right_is_const) << right;
5364 codegen_->Compare32BitValue(left_high, val_high);
5365 }
5366 __ j(kLess, &less); // Signed compare.
5367 __ j(kGreater, &greater); // Signed compare.
5368 if (right.IsRegisterPair()) {
5369 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5370 } else if (right.IsDoubleStackSlot()) {
5371 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5372 } else {
5373 DCHECK(right_is_const) << right;
5374 codegen_->Compare32BitValue(left_low, val_low);
5375 }
5376 less_cond = kBelow; // for CF (unsigned).
5377 break;
5378 }
5379 case DataType::Type::kFloat32: {
5380 GenerateFPCompare(left, right, compare, false);
5381 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5382 less_cond = kBelow; // for CF (floats).
5383 break;
5384 }
5385 case DataType::Type::kFloat64: {
5386 GenerateFPCompare(left, right, compare, true);
5387 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5388 less_cond = kBelow; // for CF (floats).
5389 break;
5390 }
5391 default:
5392 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5393 }
5394
5395 __ movl(out, Immediate(0));
5396 __ j(kEqual, &done);
5397 __ j(less_cond, &less);
5398
5399 __ Bind(&greater);
5400 __ movl(out, Immediate(1));
5401 __ jmp(&done);
5402
5403 __ Bind(&less);
5404 __ movl(out, Immediate(-1));
5405
5406 __ Bind(&done);
5407 }
5408
VisitPhi(HPhi * instruction)5409 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5410 LocationSummary* locations =
5411 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5412 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5413 locations->SetInAt(i, Location::Any());
5414 }
5415 locations->SetOut(Location::Any());
5416 }
5417
VisitPhi(HPhi * instruction)5418 void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
5419 LOG(FATAL) << "Unreachable";
5420 }
5421
GenerateMemoryBarrier(MemBarrierKind kind)5422 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5423 /*
5424 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5425 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5426 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5427 */
5428 switch (kind) {
5429 case MemBarrierKind::kAnyAny: {
5430 MemoryFence();
5431 break;
5432 }
5433 case MemBarrierKind::kAnyStore:
5434 case MemBarrierKind::kLoadAny:
5435 case MemBarrierKind::kStoreStore: {
5436 // nop
5437 break;
5438 }
5439 case MemBarrierKind::kNTStoreStore:
5440 // Non-Temporal Store/Store needs an explicit fence.
5441 MemoryFence(/* non-temporal= */ true);
5442 break;
5443 }
5444 }
5445
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)5446 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5447 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5448 [[maybe_unused]] ArtMethod* method) {
5449 return desired_dispatch_info;
5450 }
5451
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5452 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5453 if (invoke->IsInvokeStaticOrDirect()) {
5454 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5455 }
5456 DCHECK(invoke->IsInvokeInterface());
5457 Location location =
5458 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5459 return location.AsRegister<Register>();
5460 }
5461
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5462 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5463 Register temp) {
5464 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5465 if (!invoke->GetLocations()->Intrinsified()) {
5466 return location.AsRegister<Register>();
5467 }
5468 // For intrinsics we allow any location, so it may be on the stack.
5469 if (!location.IsRegister()) {
5470 __ movl(temp, Address(ESP, location.GetStackIndex()));
5471 return temp;
5472 }
5473 // For register locations, check if the register was saved. If so, get it from the stack.
5474 // Note: There is a chance that the register was saved but not overwritten, so we could
5475 // save one load. However, since this is just an intrinsic slow path we prefer this
5476 // simple and more robust approach rather that trying to determine if that's the case.
5477 SlowPathCode* slow_path = GetCurrentSlowPath();
5478 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5479 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5480 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5481 __ movl(temp, Address(ESP, stack_offset));
5482 return temp;
5483 }
5484 return location.AsRegister<Register>();
5485 }
5486
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5487 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5488 switch (load_kind) {
5489 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5490 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5491 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5492 __ leal(temp.AsRegister<Register>(),
5493 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5494 RecordBootImageMethodPatch(invoke);
5495 break;
5496 }
5497 case MethodLoadKind::kBootImageRelRo: {
5498 size_t index = invoke->IsInvokeInterface()
5499 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5500 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5501 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5502 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5503 RecordBootImageRelRoPatch(
5504 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5505 GetBootImageOffset(invoke));
5506 break;
5507 }
5508 case MethodLoadKind::kBssEntry: {
5509 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5510 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5511 RecordMethodBssEntryPatch(invoke);
5512 // No need for memory fence, thanks to the x86 memory model.
5513 break;
5514 }
5515 case MethodLoadKind::kJitDirectAddress: {
5516 __ movl(temp.AsRegister<Register>(),
5517 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5518 break;
5519 }
5520 case MethodLoadKind::kRuntimeCall: {
5521 // Test situation, don't do anything.
5522 break;
5523 }
5524 default: {
5525 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5526 UNREACHABLE();
5527 }
5528 }
5529 }
5530
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5531 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5532 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5533 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5534 switch (invoke->GetMethodLoadKind()) {
5535 case MethodLoadKind::kStringInit: {
5536 // temp = thread->string_init_entrypoint
5537 uint32_t offset =
5538 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5539 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5540 break;
5541 }
5542 case MethodLoadKind::kRecursive: {
5543 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5544 break;
5545 }
5546 case MethodLoadKind::kRuntimeCall: {
5547 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5548 return; // No code pointer retrieval; the runtime performs the call directly.
5549 }
5550 case MethodLoadKind::kBootImageLinkTimePcRelative:
5551 // For kCallCriticalNative we skip loading the method and do the call directly.
5552 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5553 break;
5554 }
5555 FALLTHROUGH_INTENDED;
5556 default: {
5557 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5558 }
5559 }
5560
5561 switch (invoke->GetCodePtrLocation()) {
5562 case CodePtrLocation::kCallSelf:
5563 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5564 __ call(GetFrameEntryLabel());
5565 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5566 break;
5567 case CodePtrLocation::kCallCriticalNative: {
5568 size_t out_frame_size =
5569 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5570 kNativeStackAlignment,
5571 GetCriticalNativeDirectCallFrameSize>(invoke);
5572 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5573 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5574 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5575 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5576 RecordBootImageJniEntrypointPatch(invoke);
5577 } else {
5578 // (callee_method + offset_of_jni_entry_point)()
5579 __ call(Address(callee_method.AsRegister<Register>(),
5580 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5581 }
5582 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5583 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5584 // Create space for conversion.
5585 out_frame_size = 8u;
5586 IncreaseFrame(out_frame_size);
5587 }
5588 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5589 switch (invoke->GetType()) {
5590 case DataType::Type::kBool:
5591 __ movzxb(EAX, AL);
5592 break;
5593 case DataType::Type::kInt8:
5594 __ movsxb(EAX, AL);
5595 break;
5596 case DataType::Type::kUint16:
5597 __ movzxw(EAX, EAX);
5598 break;
5599 case DataType::Type::kInt16:
5600 __ movsxw(EAX, EAX);
5601 break;
5602 case DataType::Type::kFloat32:
5603 __ fstps(Address(ESP, 0));
5604 __ movss(XMM0, Address(ESP, 0));
5605 break;
5606 case DataType::Type::kFloat64:
5607 __ fstpl(Address(ESP, 0));
5608 __ movsd(XMM0, Address(ESP, 0));
5609 break;
5610 case DataType::Type::kInt32:
5611 case DataType::Type::kInt64:
5612 case DataType::Type::kVoid:
5613 break;
5614 default:
5615 DCHECK(false) << invoke->GetType();
5616 break;
5617 }
5618 if (out_frame_size != 0u) {
5619 DecreaseFrame(out_frame_size);
5620 }
5621 break;
5622 }
5623 case CodePtrLocation::kCallArtMethod:
5624 // (callee_method + offset_of_quick_compiled_code)()
5625 __ call(Address(callee_method.AsRegister<Register>(),
5626 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5627 kX86PointerSize).Int32Value()));
5628 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5629 break;
5630 }
5631
5632 DCHECK(!IsLeafMethod());
5633 }
5634
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5635 void CodeGeneratorX86::GenerateVirtualCall(
5636 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5637 Register temp = temp_in.AsRegister<Register>();
5638 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5639 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5640
5641 // Use the calling convention instead of the location of the receiver, as
5642 // intrinsics may have put the receiver in a different register. In the intrinsics
5643 // slow path, the arguments have been moved to the right place, so here we are
5644 // guaranteed that the receiver is the first register of the calling convention.
5645 InvokeDexCallingConvention calling_convention;
5646 Register receiver = calling_convention.GetRegisterAt(0);
5647 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5648 // /* HeapReference<Class> */ temp = receiver->klass_
5649 __ movl(temp, Address(receiver, class_offset));
5650 MaybeRecordImplicitNullCheck(invoke);
5651 // Instead of simply (possibly) unpoisoning `temp` here, we should
5652 // emit a read barrier for the previous class reference load.
5653 // However this is not required in practice, as this is an
5654 // intermediate/temporary reference and because the current
5655 // concurrent copying collector keeps the from-space memory
5656 // intact/accessible until the end of the marking phase (the
5657 // concurrent copying collector may not in the future).
5658 __ MaybeUnpoisonHeapReference(temp);
5659
5660 MaybeGenerateInlineCacheCheck(invoke, temp);
5661
5662 // temp = temp->GetMethodAt(method_offset);
5663 __ movl(temp, Address(temp, method_offset));
5664 // call temp->GetEntryPoint();
5665 __ call(Address(
5666 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5667 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5668 }
5669
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5670 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5671 uint32_t intrinsic_data) {
5672 boot_image_other_patches_.emplace_back(
5673 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5674 __ Bind(&boot_image_other_patches_.back().label);
5675 }
5676
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5677 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5678 uint32_t boot_image_offset) {
5679 boot_image_other_patches_.emplace_back(
5680 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5681 __ Bind(&boot_image_other_patches_.back().label);
5682 }
5683
RecordBootImageMethodPatch(HInvoke * invoke)5684 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5685 size_t index = invoke->IsInvokeInterface()
5686 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5687 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5688 HX86ComputeBaseMethodAddress* method_address =
5689 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5690 boot_image_method_patches_.emplace_back(
5691 method_address,
5692 invoke->GetResolvedMethodReference().dex_file,
5693 invoke->GetResolvedMethodReference().index);
5694 __ Bind(&boot_image_method_patches_.back().label);
5695 }
5696
RecordMethodBssEntryPatch(HInvoke * invoke)5697 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5698 size_t index = invoke->IsInvokeInterface()
5699 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5700 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5701 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5702 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5703 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5704 invoke->GetMethodReference().dex_file));
5705 HX86ComputeBaseMethodAddress* method_address =
5706 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5707 // Add the patch entry and bind its label at the end of the instruction.
5708 method_bss_entry_patches_.emplace_back(
5709 method_address,
5710 invoke->GetMethodReference().dex_file,
5711 invoke->GetMethodReference().index);
5712 __ Bind(&method_bss_entry_patches_.back().label);
5713 }
5714
RecordBootImageTypePatch(HLoadClass * load_class)5715 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5716 HX86ComputeBaseMethodAddress* method_address =
5717 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5718 boot_image_type_patches_.emplace_back(
5719 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5720 __ Bind(&boot_image_type_patches_.back().label);
5721 }
5722
RecordAppImageTypePatch(HLoadClass * load_class)5723 void CodeGeneratorX86::RecordAppImageTypePatch(HLoadClass* load_class) {
5724 HX86ComputeBaseMethodAddress* method_address =
5725 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5726 app_image_type_patches_.emplace_back(
5727 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5728 __ Bind(&app_image_type_patches_.back().label);
5729 }
5730
NewTypeBssEntryPatch(HLoadClass * load_class)5731 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5732 HX86ComputeBaseMethodAddress* method_address =
5733 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5734 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5735 switch (load_class->GetLoadKind()) {
5736 case HLoadClass::LoadKind::kBssEntry:
5737 patches = &type_bss_entry_patches_;
5738 break;
5739 case HLoadClass::LoadKind::kBssEntryPublic:
5740 patches = &public_type_bss_entry_patches_;
5741 break;
5742 case HLoadClass::LoadKind::kBssEntryPackage:
5743 patches = &package_type_bss_entry_patches_;
5744 break;
5745 default:
5746 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5747 UNREACHABLE();
5748 }
5749 patches->emplace_back(
5750 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5751 return &patches->back().label;
5752 }
5753
RecordBootImageStringPatch(HLoadString * load_string)5754 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5755 HX86ComputeBaseMethodAddress* method_address =
5756 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5757 boot_image_string_patches_.emplace_back(
5758 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5759 __ Bind(&boot_image_string_patches_.back().label);
5760 }
5761
NewStringBssEntryPatch(HLoadString * load_string)5762 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5763 HX86ComputeBaseMethodAddress* method_address =
5764 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5765 string_bss_entry_patches_.emplace_back(
5766 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5767 return &string_bss_entry_patches_.back().label;
5768 }
5769
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5770 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5771 HX86ComputeBaseMethodAddress* method_address =
5772 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5773 boot_image_jni_entrypoint_patches_.emplace_back(
5774 method_address,
5775 invoke->GetResolvedMethodReference().dex_file,
5776 invoke->GetResolvedMethodReference().index);
5777 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5778 }
5779
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5780 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5781 uint32_t boot_image_reference,
5782 HInvokeStaticOrDirect* invoke) {
5783 if (GetCompilerOptions().IsBootImage()) {
5784 HX86ComputeBaseMethodAddress* method_address =
5785 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5786 DCHECK(method_address != nullptr);
5787 Register method_address_reg =
5788 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5789 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5790 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5791 } else if (GetCompilerOptions().GetCompilePic()) {
5792 HX86ComputeBaseMethodAddress* method_address =
5793 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5794 DCHECK(method_address != nullptr);
5795 Register method_address_reg =
5796 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5797 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5798 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5799 } else {
5800 DCHECK(GetCompilerOptions().IsJitCompiler());
5801 gc::Heap* heap = Runtime::Current()->GetHeap();
5802 DCHECK(!heap->GetBootImageSpaces().empty());
5803 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5804 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5805 }
5806 }
5807
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5808 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5809 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5810 if (GetCompilerOptions().IsBootImage()) {
5811 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5812 HX86ComputeBaseMethodAddress* method_address =
5813 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5814 DCHECK(method_address != nullptr);
5815 Register method_address_reg =
5816 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5817 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5818 MethodReference target_method = invoke->GetResolvedMethodReference();
5819 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5820 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5821 __ Bind(&boot_image_type_patches_.back().label);
5822 } else {
5823 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5824 LoadBootImageAddress(reg, boot_image_offset, invoke);
5825 }
5826 }
5827
5828 // The label points to the end of the "movl" or another instruction but the literal offset
5829 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5830 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5831
5832 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5833 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5834 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5835 ArenaVector<linker::LinkerPatch>* linker_patches) {
5836 for (const X86PcRelativePatchInfo& info : infos) {
5837 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5838 linker_patches->push_back(Factory(literal_offset,
5839 info.target_dex_file,
5840 GetMethodAddressOffset(info.method_address),
5841 info.offset_or_index));
5842 }
5843 }
5844
5845 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5846 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5847 const DexFile* target_dex_file,
5848 uint32_t pc_insn_offset,
5849 uint32_t boot_image_offset) {
5850 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5851 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5852 }
5853
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5854 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5855 DCHECK(linker_patches->empty());
5856 size_t size =
5857 boot_image_method_patches_.size() +
5858 method_bss_entry_patches_.size() +
5859 boot_image_type_patches_.size() +
5860 app_image_type_patches_.size() +
5861 type_bss_entry_patches_.size() +
5862 public_type_bss_entry_patches_.size() +
5863 package_type_bss_entry_patches_.size() +
5864 boot_image_string_patches_.size() +
5865 string_bss_entry_patches_.size() +
5866 boot_image_jni_entrypoint_patches_.size() +
5867 boot_image_other_patches_.size();
5868 linker_patches->reserve(size);
5869 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5870 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5871 boot_image_method_patches_, linker_patches);
5872 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5873 boot_image_type_patches_, linker_patches);
5874 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5875 boot_image_string_patches_, linker_patches);
5876 } else {
5877 DCHECK(boot_image_method_patches_.empty());
5878 DCHECK(boot_image_type_patches_.empty());
5879 DCHECK(boot_image_string_patches_.empty());
5880 }
5881 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5882 if (GetCompilerOptions().IsBootImage()) {
5883 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5884 boot_image_other_patches_, linker_patches);
5885 } else {
5886 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5887 boot_image_other_patches_, linker_patches);
5888 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5889 app_image_type_patches_, linker_patches);
5890 }
5891 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5892 method_bss_entry_patches_, linker_patches);
5893 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5894 type_bss_entry_patches_, linker_patches);
5895 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5896 public_type_bss_entry_patches_, linker_patches);
5897 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5898 package_type_bss_entry_patches_, linker_patches);
5899 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5900 string_bss_entry_patches_, linker_patches);
5901 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5902 boot_image_jni_entrypoint_patches_, linker_patches);
5903 DCHECK_EQ(size, linker_patches->size());
5904 }
5905
MaybeMarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5906 void CodeGeneratorX86::MaybeMarkGCCard(
5907 Register temp, Register card, Register object, Register value, bool emit_null_check) {
5908 NearLabel is_null;
5909 if (emit_null_check) {
5910 __ testl(value, value);
5911 __ j(kEqual, &is_null);
5912 }
5913 MarkGCCard(temp, card, object);
5914 if (emit_null_check) {
5915 __ Bind(&is_null);
5916 }
5917 }
5918
MarkGCCard(Register temp,Register card,Register object)5919 void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object) {
5920 // Load the address of the card table into `card`.
5921 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5922 // Calculate the offset (in the card table) of the card corresponding to `object`.
5923 __ movl(temp, object);
5924 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5925 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5926 // `object`'s card.
5927 //
5928 // Register `card` contains the address of the card table. Note that the card
5929 // table's base is biased during its creation so that it always starts at an
5930 // address whose least-significant byte is equal to `kCardDirty` (see
5931 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5932 // below writes the `kCardDirty` (byte) value into the `object`'s card
5933 // (located at `card + object >> kCardShift`).
5934 //
5935 // This dual use of the value in register `card` (1. to calculate the location
5936 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5937 // (no need to explicitly load `kCardDirty` as an immediate value).
5938 __ movb(Address(temp, card, TIMES_1, 0),
5939 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5940 }
5941
CheckGCCardIsValid(Register temp,Register card,Register object)5942 void CodeGeneratorX86::CheckGCCardIsValid(Register temp, Register card, Register object) {
5943 NearLabel done;
5944 __ j(kEqual, &done);
5945 // Load the address of the card table into `card`.
5946 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5947 // Calculate the offset (in the card table) of the card corresponding to `object`.
5948 __ movl(temp, object);
5949 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5950 // assert (!clean || !self->is_gc_marking)
5951 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
5952 __ j(kNotEqual, &done);
5953 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()), Immediate(0));
5954 __ j(kEqual, &done);
5955 __ int3();
5956 __ Bind(&done);
5957 }
5958
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5959 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5960 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5961
5962 bool object_field_get_with_read_barrier =
5963 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5964 LocationSummary* locations =
5965 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5966 codegen_->EmitReadBarrier()
5967 ? LocationSummary::kCallOnSlowPath
5968 : LocationSummary::kNoCall);
5969 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5970 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5971 }
5972 // receiver_input
5973 locations->SetInAt(0, Location::RequiresRegister());
5974 if (DataType::IsFloatingPointType(instruction->GetType())) {
5975 locations->SetOut(Location::RequiresFpuRegister());
5976 } else {
5977 // The output overlaps in case of long: we don't want the low move
5978 // to overwrite the object's location. Likewise, in the case of
5979 // an object field get with read barriers enabled, we do not want
5980 // the move to overwrite the object's location, as we need it to emit
5981 // the read barrier.
5982 locations->SetOut(
5983 Location::RequiresRegister(),
5984 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64)
5985 ? Location::kOutputOverlap
5986 : Location::kNoOutputOverlap);
5987 }
5988
5989 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5990 // Long values can be loaded atomically into an XMM using movsd.
5991 // So we use an XMM register as a temp to achieve atomicity (first
5992 // load the temp into the XMM and then copy the XMM into the
5993 // output, 32 bits at a time).
5994 locations->AddTemp(Location::RequiresFpuRegister());
5995 }
5996 }
5997
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5998 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5999 const FieldInfo& field_info) {
6000 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6001
6002 LocationSummary* locations = instruction->GetLocations();
6003 Location base_loc = locations->InAt(0);
6004 Register base = base_loc.AsRegister<Register>();
6005 Location out = locations->Out();
6006 bool is_volatile = field_info.IsVolatile();
6007 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6008 DataType::Type load_type = instruction->GetType();
6009 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6010
6011 if (load_type == DataType::Type::kReference) {
6012 // /* HeapReference<Object> */ out = *(base + offset)
6013 if (codegen_->EmitBakerReadBarrier()) {
6014 // Note that a potential implicit null check is handled in this
6015 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
6016 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6017 instruction, out, base, offset, /* needs_null_check= */ true);
6018 if (is_volatile) {
6019 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6020 }
6021 } else {
6022 __ movl(out.AsRegister<Register>(), Address(base, offset));
6023 codegen_->MaybeRecordImplicitNullCheck(instruction);
6024 if (is_volatile) {
6025 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6026 }
6027 // If read barriers are enabled, emit read barriers other than
6028 // Baker's using a slow path (and also unpoison the loaded
6029 // reference, if heap poisoning is enabled).
6030 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
6031 }
6032 } else {
6033 Address src(base, offset);
6034 XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
6035 ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
6036 : kNoXmmRegister;
6037 codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
6038 if (is_volatile) {
6039 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6040 }
6041 }
6042 }
6043
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)6044 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
6045 const FieldInfo& field_info,
6046 WriteBarrierKind write_barrier_kind) {
6047 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6048
6049 LocationSummary* locations =
6050 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6051 locations->SetInAt(0, Location::RequiresRegister());
6052 bool is_volatile = field_info.IsVolatile();
6053 DataType::Type field_type = field_info.GetFieldType();
6054 bool is_byte_type = DataType::Size(field_type) == 1u;
6055
6056 // The register allocator does not support multiple
6057 // inputs that die at entry with one in a specific register.
6058 if (is_byte_type) {
6059 // Ensure the value is in a byte register.
6060 locations->SetInAt(1, Location::RegisterLocation(EAX));
6061 } else if (DataType::IsFloatingPointType(field_type)) {
6062 if (is_volatile && field_type == DataType::Type::kFloat64) {
6063 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6064 locations->SetInAt(1, Location::RequiresFpuRegister());
6065 } else {
6066 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
6067 }
6068 } else if (is_volatile && field_type == DataType::Type::kInt64) {
6069 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6070 locations->SetInAt(1, Location::RequiresRegister());
6071
6072 // 64bits value can be atomically written to an address with movsd and an XMM register.
6073 // We need two XMM registers because there's no easier way to (bit) copy a register pair
6074 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
6075 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
6076 // isolated cases when we need this it isn't worth adding the extra complexity.
6077 locations->AddTemp(Location::RequiresFpuRegister());
6078 locations->AddTemp(Location::RequiresFpuRegister());
6079 } else {
6080 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6081
6082 bool needs_write_barrier =
6083 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6084 bool check_gc_card =
6085 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6086
6087 if (needs_write_barrier || check_gc_card) {
6088 locations->AddTemp(Location::RequiresRegister());
6089 // Ensure the card is in a byte register.
6090 locations->AddTemp(Location::RegisterLocation(ECX));
6091 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6092 locations->AddTemp(Location::RequiresRegister());
6093 }
6094 }
6095 }
6096
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6097 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6098 uint32_t value_index,
6099 DataType::Type field_type,
6100 Address field_addr,
6101 Register base,
6102 bool is_volatile,
6103 bool value_can_be_null,
6104 WriteBarrierKind write_barrier_kind) {
6105 LocationSummary* locations = instruction->GetLocations();
6106 Location value = locations->InAt(value_index);
6107 bool needs_write_barrier =
6108 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6109
6110 if (is_volatile) {
6111 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6112 }
6113
6114 bool maybe_record_implicit_null_check_done = false;
6115
6116 switch (field_type) {
6117 case DataType::Type::kBool:
6118 case DataType::Type::kUint8:
6119 case DataType::Type::kInt8: {
6120 if (value.IsConstant()) {
6121 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6122 } else {
6123 __ movb(field_addr, value.AsRegister<ByteRegister>());
6124 }
6125 break;
6126 }
6127
6128 case DataType::Type::kUint16:
6129 case DataType::Type::kInt16: {
6130 if (value.IsConstant()) {
6131 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6132 } else {
6133 __ movw(field_addr, value.AsRegister<Register>());
6134 }
6135 break;
6136 }
6137
6138 case DataType::Type::kInt32:
6139 case DataType::Type::kReference: {
6140 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6141 if (value.IsConstant()) {
6142 DCHECK(value.GetConstant()->IsNullConstant())
6143 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6144 << " is not null. Instruction " << *instruction;
6145 // No need to poison null, just do a movl.
6146 __ movl(field_addr, Immediate(0));
6147 } else {
6148 Register temp = locations->GetTemp(0).AsRegister<Register>();
6149 __ movl(temp, value.AsRegister<Register>());
6150 __ PoisonHeapReference(temp);
6151 __ movl(field_addr, temp);
6152 }
6153 } else if (value.IsConstant()) {
6154 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6155 __ movl(field_addr, Immediate(v));
6156 } else {
6157 DCHECK(value.IsRegister()) << value;
6158 __ movl(field_addr, value.AsRegister<Register>());
6159 }
6160 break;
6161 }
6162
6163 case DataType::Type::kInt64: {
6164 if (is_volatile) {
6165 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6166 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6167 __ movd(temp1, value.AsRegisterPairLow<Register>());
6168 __ movd(temp2, value.AsRegisterPairHigh<Register>());
6169 __ punpckldq(temp1, temp2);
6170 __ movsd(field_addr, temp1);
6171 codegen_->MaybeRecordImplicitNullCheck(instruction);
6172 } else if (value.IsConstant()) {
6173 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6174 __ movl(field_addr, Immediate(Low32Bits(v)));
6175 codegen_->MaybeRecordImplicitNullCheck(instruction);
6176 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6177 } else {
6178 __ movl(field_addr, value.AsRegisterPairLow<Register>());
6179 codegen_->MaybeRecordImplicitNullCheck(instruction);
6180 __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6181 }
6182 maybe_record_implicit_null_check_done = true;
6183 break;
6184 }
6185
6186 case DataType::Type::kFloat32: {
6187 if (value.IsConstant()) {
6188 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6189 __ movl(field_addr, Immediate(v));
6190 } else {
6191 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6192 }
6193 break;
6194 }
6195
6196 case DataType::Type::kFloat64: {
6197 if (value.IsConstant()) {
6198 DCHECK(!is_volatile);
6199 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6200 __ movl(field_addr, Immediate(Low32Bits(v)));
6201 codegen_->MaybeRecordImplicitNullCheck(instruction);
6202 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6203 maybe_record_implicit_null_check_done = true;
6204 } else {
6205 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6206 }
6207 break;
6208 }
6209
6210 case DataType::Type::kUint32:
6211 case DataType::Type::kUint64:
6212 case DataType::Type::kVoid:
6213 LOG(FATAL) << "Unreachable type " << field_type;
6214 UNREACHABLE();
6215 }
6216
6217 if (!maybe_record_implicit_null_check_done) {
6218 codegen_->MaybeRecordImplicitNullCheck(instruction);
6219 }
6220
6221 if (needs_write_barrier) {
6222 Register temp = locations->GetTemp(0).AsRegister<Register>();
6223 Register card = locations->GetTemp(1).AsRegister<Register>();
6224 if (value.IsConstant()) {
6225 DCHECK(value.GetConstant()->IsNullConstant())
6226 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6227 << " is not null. Instruction: " << *instruction;
6228 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6229 codegen_->MarkGCCard(temp, card, base);
6230 }
6231 } else {
6232 codegen_->MaybeMarkGCCard(
6233 temp,
6234 card,
6235 base,
6236 value.AsRegister<Register>(),
6237 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6238 }
6239 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6240 if (value.IsConstant()) {
6241 // If we are storing a constant for a reference, we are in the case where we are storing
6242 // null but we cannot skip it as this write barrier is being relied on by coalesced write
6243 // barriers.
6244 DCHECK(value.GetConstant()->IsNullConstant())
6245 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6246 << " is not null. Instruction: " << *instruction;
6247 // No need to check the dirty bit as this value is null.
6248 } else {
6249 Register temp = locations->GetTemp(0).AsRegister<Register>();
6250 Register card = locations->GetTemp(1).AsRegister<Register>();
6251 codegen_->CheckGCCardIsValid(temp, card, base);
6252 }
6253 }
6254
6255 if (is_volatile) {
6256 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6257 }
6258 }
6259
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6260 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6261 const FieldInfo& field_info,
6262 bool value_can_be_null,
6263 WriteBarrierKind write_barrier_kind) {
6264 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6265
6266 LocationSummary* locations = instruction->GetLocations();
6267 Register base = locations->InAt(0).AsRegister<Register>();
6268 bool is_volatile = field_info.IsVolatile();
6269 DataType::Type field_type = field_info.GetFieldType();
6270 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6271 Address field_addr(base, offset);
6272
6273 HandleFieldSet(instruction,
6274 /* value_index= */ 1,
6275 field_type,
6276 field_addr,
6277 base,
6278 is_volatile,
6279 value_can_be_null,
6280 write_barrier_kind);
6281 }
6282
VisitStaticFieldGet(HStaticFieldGet * instruction)6283 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6284 HandleFieldGet(instruction, instruction->GetFieldInfo());
6285 }
6286
VisitStaticFieldGet(HStaticFieldGet * instruction)6287 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6288 HandleFieldGet(instruction, instruction->GetFieldInfo());
6289 }
6290
VisitStaticFieldSet(HStaticFieldSet * instruction)6291 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6292 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6293 }
6294
VisitStaticFieldSet(HStaticFieldSet * instruction)6295 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6296 HandleFieldSet(instruction,
6297 instruction->GetFieldInfo(),
6298 instruction->GetValueCanBeNull(),
6299 instruction->GetWriteBarrierKind());
6300 }
6301
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6302 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6303 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6304 }
6305
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6306 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6307 HandleFieldSet(instruction,
6308 instruction->GetFieldInfo(),
6309 instruction->GetValueCanBeNull(),
6310 instruction->GetWriteBarrierKind());
6311 }
6312
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6313 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6314 HandleFieldGet(instruction, instruction->GetFieldInfo());
6315 }
6316
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6317 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6318 HandleFieldGet(instruction, instruction->GetFieldInfo());
6319 }
6320
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6321 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6322 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6323 }
6324
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6325 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6326 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6327 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6328 }
6329
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6330 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6331 HUnresolvedInstanceFieldGet* instruction) {
6332 FieldAccessCallingConventionX86 calling_convention;
6333 codegen_->CreateUnresolvedFieldLocationSummary(
6334 instruction, instruction->GetFieldType(), calling_convention);
6335 }
6336
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6337 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6338 HUnresolvedInstanceFieldGet* instruction) {
6339 FieldAccessCallingConventionX86 calling_convention;
6340 codegen_->GenerateUnresolvedFieldAccess(instruction,
6341 instruction->GetFieldType(),
6342 instruction->GetFieldIndex(),
6343 instruction->GetDexPc(),
6344 calling_convention);
6345 }
6346
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6347 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6348 HUnresolvedInstanceFieldSet* instruction) {
6349 FieldAccessCallingConventionX86 calling_convention;
6350 codegen_->CreateUnresolvedFieldLocationSummary(
6351 instruction, instruction->GetFieldType(), calling_convention);
6352 }
6353
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6354 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6355 HUnresolvedInstanceFieldSet* instruction) {
6356 FieldAccessCallingConventionX86 calling_convention;
6357 codegen_->GenerateUnresolvedFieldAccess(instruction,
6358 instruction->GetFieldType(),
6359 instruction->GetFieldIndex(),
6360 instruction->GetDexPc(),
6361 calling_convention);
6362 }
6363
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6364 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6365 HUnresolvedStaticFieldGet* instruction) {
6366 FieldAccessCallingConventionX86 calling_convention;
6367 codegen_->CreateUnresolvedFieldLocationSummary(
6368 instruction, instruction->GetFieldType(), calling_convention);
6369 }
6370
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6371 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6372 HUnresolvedStaticFieldGet* instruction) {
6373 FieldAccessCallingConventionX86 calling_convention;
6374 codegen_->GenerateUnresolvedFieldAccess(instruction,
6375 instruction->GetFieldType(),
6376 instruction->GetFieldIndex(),
6377 instruction->GetDexPc(),
6378 calling_convention);
6379 }
6380
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6381 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6382 HUnresolvedStaticFieldSet* instruction) {
6383 FieldAccessCallingConventionX86 calling_convention;
6384 codegen_->CreateUnresolvedFieldLocationSummary(
6385 instruction, instruction->GetFieldType(), calling_convention);
6386 }
6387
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6388 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6389 HUnresolvedStaticFieldSet* instruction) {
6390 FieldAccessCallingConventionX86 calling_convention;
6391 codegen_->GenerateUnresolvedFieldAccess(instruction,
6392 instruction->GetFieldType(),
6393 instruction->GetFieldIndex(),
6394 instruction->GetDexPc(),
6395 calling_convention);
6396 }
6397
VisitNullCheck(HNullCheck * instruction)6398 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6399 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6400 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6401 ? Location::RequiresRegister()
6402 : Location::Any();
6403 locations->SetInAt(0, loc);
6404 }
6405
GenerateImplicitNullCheck(HNullCheck * instruction)6406 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6407 if (CanMoveNullCheckToUser(instruction)) {
6408 return;
6409 }
6410 LocationSummary* locations = instruction->GetLocations();
6411 Location obj = locations->InAt(0);
6412
6413 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6414 RecordPcInfo(instruction, instruction->GetDexPc());
6415 }
6416
GenerateExplicitNullCheck(HNullCheck * instruction)6417 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6418 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6419 AddSlowPath(slow_path);
6420
6421 LocationSummary* locations = instruction->GetLocations();
6422 Location obj = locations->InAt(0);
6423
6424 if (obj.IsRegister()) {
6425 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6426 } else if (obj.IsStackSlot()) {
6427 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6428 } else {
6429 DCHECK(obj.IsConstant()) << obj;
6430 DCHECK(obj.GetConstant()->IsNullConstant());
6431 __ jmp(slow_path->GetEntryLabel());
6432 return;
6433 }
6434 __ j(kEqual, slow_path->GetEntryLabel());
6435 }
6436
VisitNullCheck(HNullCheck * instruction)6437 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6438 codegen_->GenerateNullCheck(instruction);
6439 }
6440
VisitArrayGet(HArrayGet * instruction)6441 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6442 bool object_array_get_with_read_barrier =
6443 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6444 LocationSummary* locations =
6445 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6446 object_array_get_with_read_barrier
6447 ? LocationSummary::kCallOnSlowPath
6448 : LocationSummary::kNoCall);
6449 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6450 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6451 }
6452 locations->SetInAt(0, Location::RequiresRegister());
6453 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6454 if (DataType::IsFloatingPointType(instruction->GetType())) {
6455 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6456 } else {
6457 // The output overlaps in case of long: we don't want the low move
6458 // to overwrite the array's location. Likewise, in the case of an
6459 // object array get with read barriers enabled, we do not want the
6460 // move to overwrite the array's location, as we need it to emit
6461 // the read barrier.
6462 locations->SetOut(
6463 Location::RequiresRegister(),
6464 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6465 ? Location::kOutputOverlap
6466 : Location::kNoOutputOverlap);
6467 }
6468 }
6469
VisitArrayGet(HArrayGet * instruction)6470 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6471 LocationSummary* locations = instruction->GetLocations();
6472 Location obj_loc = locations->InAt(0);
6473 Register obj = obj_loc.AsRegister<Register>();
6474 Location index = locations->InAt(1);
6475 Location out_loc = locations->Out();
6476 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6477
6478 DataType::Type type = instruction->GetType();
6479 if (type == DataType::Type::kReference) {
6480 static_assert(
6481 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6482 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6483 // /* HeapReference<Object> */ out =
6484 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6485 if (codegen_->EmitBakerReadBarrier()) {
6486 // Note that a potential implicit null check is handled in this
6487 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6488 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6489 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6490 } else {
6491 Register out = out_loc.AsRegister<Register>();
6492 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6493 codegen_->MaybeRecordImplicitNullCheck(instruction);
6494 // If read barriers are enabled, emit read barriers other than
6495 // Baker's using a slow path (and also unpoison the loaded
6496 // reference, if heap poisoning is enabled).
6497 if (index.IsConstant()) {
6498 uint32_t offset =
6499 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6500 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6501 } else {
6502 codegen_->MaybeGenerateReadBarrierSlow(
6503 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6504 }
6505 }
6506 } else if (type == DataType::Type::kUint16
6507 && mirror::kUseStringCompression
6508 && instruction->IsStringCharAt()) {
6509 // Branch cases into compressed and uncompressed for each index's type.
6510 Register out = out_loc.AsRegister<Register>();
6511 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6512 NearLabel done, not_compressed;
6513 __ testb(Address(obj, count_offset), Immediate(1));
6514 codegen_->MaybeRecordImplicitNullCheck(instruction);
6515 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6516 "Expecting 0=compressed, 1=uncompressed");
6517 __ j(kNotZero, ¬_compressed);
6518 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6519 __ jmp(&done);
6520 __ Bind(¬_compressed);
6521 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6522 __ Bind(&done);
6523 } else {
6524 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6525 Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6526 codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6527 }
6528 }
6529
VisitArraySet(HArraySet * instruction)6530 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6531 DataType::Type value_type = instruction->GetComponentType();
6532
6533 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6534 bool needs_write_barrier =
6535 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6536 bool check_gc_card =
6537 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6538 bool needs_type_check = instruction->NeedsTypeCheck();
6539
6540 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6541 instruction,
6542 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6543
6544 bool is_byte_type = DataType::Size(value_type) == 1u;
6545 // We need the inputs to be different than the output in case of long operation.
6546 // In case of a byte operation, the register allocator does not support multiple
6547 // inputs that die at entry with one in a specific register.
6548 locations->SetInAt(0, Location::RequiresRegister());
6549 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6550 if (is_byte_type) {
6551 // Ensure the value is in a byte register.
6552 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6553 } else if (DataType::IsFloatingPointType(value_type)) {
6554 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6555 } else {
6556 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6557 }
6558 if (needs_write_barrier || check_gc_card) {
6559 // Used by reference poisoning, type checking, emitting, or checking a write barrier.
6560 locations->AddTemp(Location::RequiresRegister());
6561 // Only used when emitting or checking a write barrier. Ensure the card is in a byte register.
6562 locations->AddTemp(Location::RegisterLocation(ECX));
6563 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
6564 instruction->NeedsTypeCheck()) {
6565 locations->AddTemp(Location::RequiresRegister());
6566 }
6567 }
6568
VisitArraySet(HArraySet * instruction)6569 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6570 LocationSummary* locations = instruction->GetLocations();
6571 Location array_loc = locations->InAt(0);
6572 Register array = array_loc.AsRegister<Register>();
6573 Location index = locations->InAt(1);
6574 Location value = locations->InAt(2);
6575 DataType::Type value_type = instruction->GetComponentType();
6576 bool needs_type_check = instruction->NeedsTypeCheck();
6577 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6578 bool needs_write_barrier =
6579 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6580
6581 switch (value_type) {
6582 case DataType::Type::kBool:
6583 case DataType::Type::kUint8:
6584 case DataType::Type::kInt8: {
6585 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6586 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6587 if (value.IsRegister()) {
6588 __ movb(address, value.AsRegister<ByteRegister>());
6589 } else {
6590 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6591 }
6592 codegen_->MaybeRecordImplicitNullCheck(instruction);
6593 break;
6594 }
6595
6596 case DataType::Type::kUint16:
6597 case DataType::Type::kInt16: {
6598 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6599 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6600 if (value.IsRegister()) {
6601 __ movw(address, value.AsRegister<Register>());
6602 } else {
6603 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6604 }
6605 codegen_->MaybeRecordImplicitNullCheck(instruction);
6606 break;
6607 }
6608
6609 case DataType::Type::kReference: {
6610 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6611 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6612
6613 if (!value.IsRegister()) {
6614 // Just setting null.
6615 DCHECK(instruction->InputAt(2)->IsNullConstant());
6616 DCHECK(value.IsConstant()) << value;
6617 __ movl(address, Immediate(0));
6618 codegen_->MaybeRecordImplicitNullCheck(instruction);
6619 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6620 // We need to set a write barrier here even though we are writing null, since this write
6621 // barrier is being relied on.
6622 DCHECK(needs_write_barrier);
6623 Register temp = locations->GetTemp(0).AsRegister<Register>();
6624 Register card = locations->GetTemp(1).AsRegister<Register>();
6625 codegen_->MarkGCCard(temp, card, array);
6626 }
6627 DCHECK(!needs_type_check);
6628 break;
6629 }
6630
6631 Register register_value = value.AsRegister<Register>();
6632 const bool can_value_be_null = instruction->GetValueCanBeNull();
6633 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6634 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6635 // value is null for the type check).
6636 const bool skip_marking_gc_card =
6637 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6638 NearLabel do_store;
6639 NearLabel skip_writing_card;
6640 if (can_value_be_null) {
6641 __ testl(register_value, register_value);
6642 if (skip_marking_gc_card) {
6643 __ j(kEqual, &skip_writing_card);
6644 } else {
6645 __ j(kEqual, &do_store);
6646 }
6647 }
6648
6649 SlowPathCode* slow_path = nullptr;
6650 if (needs_type_check) {
6651 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6652 codegen_->AddSlowPath(slow_path);
6653
6654 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6655 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6656 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6657
6658 // Note that when Baker read barriers are enabled, the type
6659 // checks are performed without read barriers. This is fine,
6660 // even in the case where a class object is in the from-space
6661 // after the flip, as a comparison involving such a type would
6662 // not produce a false positive; it may of course produce a
6663 // false negative, in which case we would take the ArraySet
6664 // slow path.
6665
6666 Register temp = locations->GetTemp(0).AsRegister<Register>();
6667 // /* HeapReference<Class> */ temp = array->klass_
6668 __ movl(temp, Address(array, class_offset));
6669 codegen_->MaybeRecordImplicitNullCheck(instruction);
6670 __ MaybeUnpoisonHeapReference(temp);
6671
6672 // /* HeapReference<Class> */ temp = temp->component_type_
6673 __ movl(temp, Address(temp, component_offset));
6674 // If heap poisoning is enabled, no need to unpoison `temp`
6675 // nor the object reference in `register_value->klass`, as
6676 // we are comparing two poisoned references.
6677 __ cmpl(temp, Address(register_value, class_offset));
6678
6679 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6680 NearLabel do_put;
6681 __ j(kEqual, &do_put);
6682 // If heap poisoning is enabled, the `temp` reference has
6683 // not been unpoisoned yet; unpoison it now.
6684 __ MaybeUnpoisonHeapReference(temp);
6685
6686 // If heap poisoning is enabled, no need to unpoison the
6687 // heap reference loaded below, as it is only used for a
6688 // comparison with null.
6689 __ cmpl(Address(temp, super_offset), Immediate(0));
6690 __ j(kNotEqual, slow_path->GetEntryLabel());
6691 __ Bind(&do_put);
6692 } else {
6693 __ j(kNotEqual, slow_path->GetEntryLabel());
6694 }
6695 }
6696
6697 if (can_value_be_null && !skip_marking_gc_card) {
6698 DCHECK(do_store.IsLinked());
6699 __ Bind(&do_store);
6700 }
6701
6702 if (needs_write_barrier) {
6703 Register temp = locations->GetTemp(0).AsRegister<Register>();
6704 Register card = locations->GetTemp(1).AsRegister<Register>();
6705 codegen_->MarkGCCard(temp, card, array);
6706 } else if (codegen_->ShouldCheckGCCard(
6707 value_type, instruction->GetValue(), write_barrier_kind)) {
6708 Register temp = locations->GetTemp(0).AsRegister<Register>();
6709 Register card = locations->GetTemp(1).AsRegister<Register>();
6710 codegen_->CheckGCCardIsValid(temp, card, array);
6711 }
6712
6713 if (skip_marking_gc_card) {
6714 // Note that we don't check that the GC card is valid as it can be correctly clean.
6715 DCHECK(skip_writing_card.IsLinked());
6716 __ Bind(&skip_writing_card);
6717 }
6718
6719 Register source = register_value;
6720 if (kPoisonHeapReferences) {
6721 Register temp = locations->GetTemp(0).AsRegister<Register>();
6722 __ movl(temp, register_value);
6723 __ PoisonHeapReference(temp);
6724 source = temp;
6725 }
6726
6727 __ movl(address, source);
6728
6729 if (can_value_be_null || !needs_type_check) {
6730 codegen_->MaybeRecordImplicitNullCheck(instruction);
6731 }
6732
6733 if (slow_path != nullptr) {
6734 __ Bind(slow_path->GetExitLabel());
6735 }
6736
6737 break;
6738 }
6739
6740 case DataType::Type::kInt32: {
6741 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6742 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6743 if (value.IsRegister()) {
6744 __ movl(address, value.AsRegister<Register>());
6745 } else {
6746 DCHECK(value.IsConstant()) << value;
6747 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6748 __ movl(address, Immediate(v));
6749 }
6750 codegen_->MaybeRecordImplicitNullCheck(instruction);
6751 break;
6752 }
6753
6754 case DataType::Type::kInt64: {
6755 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6756 if (value.IsRegisterPair()) {
6757 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6758 value.AsRegisterPairLow<Register>());
6759 codegen_->MaybeRecordImplicitNullCheck(instruction);
6760 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6761 value.AsRegisterPairHigh<Register>());
6762 } else {
6763 DCHECK(value.IsConstant());
6764 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6765 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6766 Immediate(Low32Bits(val)));
6767 codegen_->MaybeRecordImplicitNullCheck(instruction);
6768 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6769 Immediate(High32Bits(val)));
6770 }
6771 break;
6772 }
6773
6774 case DataType::Type::kFloat32: {
6775 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6776 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6777 if (value.IsFpuRegister()) {
6778 __ movss(address, value.AsFpuRegister<XmmRegister>());
6779 } else {
6780 DCHECK(value.IsConstant());
6781 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6782 __ movl(address, Immediate(v));
6783 }
6784 codegen_->MaybeRecordImplicitNullCheck(instruction);
6785 break;
6786 }
6787
6788 case DataType::Type::kFloat64: {
6789 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6790 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6791 if (value.IsFpuRegister()) {
6792 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6793 } else {
6794 DCHECK(value.IsConstant());
6795 Address address_hi =
6796 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6797 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6798 __ movl(address, Immediate(Low32Bits(v)));
6799 codegen_->MaybeRecordImplicitNullCheck(instruction);
6800 __ movl(address_hi, Immediate(High32Bits(v)));
6801 }
6802 break;
6803 }
6804
6805 case DataType::Type::kUint32:
6806 case DataType::Type::kUint64:
6807 case DataType::Type::kVoid:
6808 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6809 UNREACHABLE();
6810 }
6811 }
6812
VisitArrayLength(HArrayLength * instruction)6813 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6814 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6815 locations->SetInAt(0, Location::RequiresRegister());
6816 if (!instruction->IsEmittedAtUseSite()) {
6817 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6818 }
6819 }
6820
VisitArrayLength(HArrayLength * instruction)6821 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6822 if (instruction->IsEmittedAtUseSite()) {
6823 return;
6824 }
6825
6826 LocationSummary* locations = instruction->GetLocations();
6827 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6828 Register obj = locations->InAt(0).AsRegister<Register>();
6829 Register out = locations->Out().AsRegister<Register>();
6830 __ movl(out, Address(obj, offset));
6831 codegen_->MaybeRecordImplicitNullCheck(instruction);
6832 // Mask out most significant bit in case the array is String's array of char.
6833 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6834 __ shrl(out, Immediate(1));
6835 }
6836 }
6837
VisitBoundsCheck(HBoundsCheck * instruction)6838 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6839 RegisterSet caller_saves = RegisterSet::Empty();
6840 InvokeRuntimeCallingConvention calling_convention;
6841 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6842 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6843 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6844 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6845 HInstruction* length = instruction->InputAt(1);
6846 if (!length->IsEmittedAtUseSite()) {
6847 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6848 }
6849 // Need register to see array's length.
6850 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6851 locations->AddTemp(Location::RequiresRegister());
6852 }
6853 }
6854
VisitBoundsCheck(HBoundsCheck * instruction)6855 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6856 const bool is_string_compressed_char_at =
6857 mirror::kUseStringCompression && instruction->IsStringCharAt();
6858 LocationSummary* locations = instruction->GetLocations();
6859 Location index_loc = locations->InAt(0);
6860 Location length_loc = locations->InAt(1);
6861 SlowPathCode* slow_path =
6862 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6863
6864 if (length_loc.IsConstant()) {
6865 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6866 if (index_loc.IsConstant()) {
6867 // BCE will remove the bounds check if we are guarenteed to pass.
6868 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6869 if (index < 0 || index >= length) {
6870 codegen_->AddSlowPath(slow_path);
6871 __ jmp(slow_path->GetEntryLabel());
6872 } else {
6873 // Some optimization after BCE may have generated this, and we should not
6874 // generate a bounds check if it is a valid range.
6875 }
6876 return;
6877 }
6878
6879 // We have to reverse the jump condition because the length is the constant.
6880 Register index_reg = index_loc.AsRegister<Register>();
6881 __ cmpl(index_reg, Immediate(length));
6882 codegen_->AddSlowPath(slow_path);
6883 __ j(kAboveEqual, slow_path->GetEntryLabel());
6884 } else {
6885 HInstruction* array_length = instruction->InputAt(1);
6886 if (array_length->IsEmittedAtUseSite()) {
6887 // Address the length field in the array.
6888 DCHECK(array_length->IsArrayLength());
6889 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6890 Location array_loc = array_length->GetLocations()->InAt(0);
6891 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6892 if (is_string_compressed_char_at) {
6893 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6894 // the string compression flag) with the in-memory length and avoid the temporary.
6895 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6896 __ movl(length_reg, array_len);
6897 codegen_->MaybeRecordImplicitNullCheck(array_length);
6898 __ shrl(length_reg, Immediate(1));
6899 codegen_->GenerateIntCompare(length_reg, index_loc);
6900 } else {
6901 // Checking bounds for general case:
6902 // Array of char or string's array with feature compression off.
6903 if (index_loc.IsConstant()) {
6904 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6905 __ cmpl(array_len, Immediate(value));
6906 } else {
6907 __ cmpl(array_len, index_loc.AsRegister<Register>());
6908 }
6909 codegen_->MaybeRecordImplicitNullCheck(array_length);
6910 }
6911 } else {
6912 codegen_->GenerateIntCompare(length_loc, index_loc);
6913 }
6914 codegen_->AddSlowPath(slow_path);
6915 __ j(kBelowEqual, slow_path->GetEntryLabel());
6916 }
6917 }
6918
VisitParallelMove(HParallelMove * instruction)6919 void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6920 LOG(FATAL) << "Unreachable";
6921 }
6922
VisitParallelMove(HParallelMove * instruction)6923 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6924 if (instruction->GetNext()->IsSuspendCheck() &&
6925 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6926 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6927 // The back edge will generate the suspend check.
6928 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6929 }
6930
6931 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6932 }
6933
VisitSuspendCheck(HSuspendCheck * instruction)6934 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6935 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6936 instruction, LocationSummary::kCallOnSlowPath);
6937 // In suspend check slow path, usually there are no caller-save registers at all.
6938 // If SIMD instructions are present, however, we force spilling all live SIMD
6939 // registers in full width (since the runtime only saves/restores lower part).
6940 locations->SetCustomSlowPathCallerSaves(
6941 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6942 }
6943
VisitSuspendCheck(HSuspendCheck * instruction)6944 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6945 HBasicBlock* block = instruction->GetBlock();
6946 if (block->GetLoopInformation() != nullptr) {
6947 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6948 // The back edge will generate the suspend check.
6949 return;
6950 }
6951 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6952 // The goto will generate the suspend check.
6953 return;
6954 }
6955 GenerateSuspendCheck(instruction, nullptr);
6956 }
6957
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6958 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6959 HBasicBlock* successor) {
6960 SuspendCheckSlowPathX86* slow_path =
6961 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6962 if (slow_path == nullptr) {
6963 slow_path =
6964 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6965 instruction->SetSlowPath(slow_path);
6966 codegen_->AddSlowPath(slow_path);
6967 if (successor != nullptr) {
6968 DCHECK(successor->IsLoopHeader());
6969 }
6970 } else {
6971 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6972 }
6973
6974 __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6975 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6976 if (successor == nullptr) {
6977 __ j(kNotZero, slow_path->GetEntryLabel());
6978 __ Bind(slow_path->GetReturnLabel());
6979 } else {
6980 __ j(kZero, codegen_->GetLabelOf(successor));
6981 __ jmp(slow_path->GetEntryLabel());
6982 }
6983 }
6984
GetAssembler() const6985 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6986 return codegen_->GetAssembler();
6987 }
6988
MoveMemoryToMemory(int dst,int src,int number_of_words)6989 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6990 ScratchRegisterScope ensure_scratch(
6991 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6992 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6993 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6994
6995 // Now that temp register is available (possibly spilled), move blocks of memory.
6996 for (int i = 0; i < number_of_words; i++) {
6997 __ movl(temp_reg, Address(ESP, src + stack_offset));
6998 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6999 stack_offset += kX86WordSize;
7000 }
7001 }
7002
EmitMove(size_t index)7003 void ParallelMoveResolverX86::EmitMove(size_t index) {
7004 MoveOperands* move = moves_[index];
7005 Location source = move->GetSource();
7006 Location destination = move->GetDestination();
7007
7008 if (source.IsRegister()) {
7009 if (destination.IsRegister()) {
7010 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7011 } else if (destination.IsFpuRegister()) {
7012 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
7013 } else {
7014 DCHECK(destination.IsStackSlot());
7015 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
7016 }
7017 } else if (source.IsRegisterPair()) {
7018 if (destination.IsRegisterPair()) {
7019 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
7020 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
7021 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
7022 } else if (destination.IsFpuRegister()) {
7023 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7024 // Push the 2 source registers to the stack.
7025 __ pushl(source.AsRegisterPairHigh<Register>());
7026 __ cfi().AdjustCFAOffset(elem_size);
7027 __ pushl(source.AsRegisterPairLow<Register>());
7028 __ cfi().AdjustCFAOffset(elem_size);
7029 // Load the destination register.
7030 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
7031 // And remove the temporary stack space we allocated.
7032 codegen_->DecreaseFrame(2 * elem_size);
7033 } else {
7034 DCHECK(destination.IsDoubleStackSlot());
7035 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
7036 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
7037 source.AsRegisterPairHigh<Register>());
7038 }
7039 } else if (source.IsFpuRegister()) {
7040 if (destination.IsRegister()) {
7041 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
7042 } else if (destination.IsFpuRegister()) {
7043 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7044 } else if (destination.IsRegisterPair()) {
7045 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7046 // Create stack space for 2 elements.
7047 codegen_->IncreaseFrame(2 * elem_size);
7048 // Store the source register.
7049 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
7050 // And pop the values into destination registers.
7051 __ popl(destination.AsRegisterPairLow<Register>());
7052 __ cfi().AdjustCFAOffset(-elem_size);
7053 __ popl(destination.AsRegisterPairHigh<Register>());
7054 __ cfi().AdjustCFAOffset(-elem_size);
7055 } else if (destination.IsStackSlot()) {
7056 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7057 } else if (destination.IsDoubleStackSlot()) {
7058 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7059 } else {
7060 DCHECK(destination.IsSIMDStackSlot());
7061 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7062 }
7063 } else if (source.IsStackSlot()) {
7064 if (destination.IsRegister()) {
7065 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
7066 } else if (destination.IsFpuRegister()) {
7067 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7068 } else {
7069 DCHECK(destination.IsStackSlot());
7070 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7071 }
7072 } else if (source.IsDoubleStackSlot()) {
7073 if (destination.IsRegisterPair()) {
7074 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
7075 __ movl(destination.AsRegisterPairHigh<Register>(),
7076 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
7077 } else if (destination.IsFpuRegister()) {
7078 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7079 } else {
7080 DCHECK(destination.IsDoubleStackSlot()) << destination;
7081 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7082 }
7083 } else if (source.IsSIMDStackSlot()) {
7084 if (destination.IsFpuRegister()) {
7085 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7086 } else {
7087 DCHECK(destination.IsSIMDStackSlot());
7088 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7089 }
7090 } else if (source.IsConstant()) {
7091 HConstant* constant = source.GetConstant();
7092 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7093 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7094 if (destination.IsRegister()) {
7095 if (value == 0) {
7096 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
7097 } else {
7098 __ movl(destination.AsRegister<Register>(), Immediate(value));
7099 }
7100 } else {
7101 DCHECK(destination.IsStackSlot()) << destination;
7102 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
7103 }
7104 } else if (constant->IsFloatConstant()) {
7105 float fp_value = constant->AsFloatConstant()->GetValue();
7106 int32_t value = bit_cast<int32_t, float>(fp_value);
7107 Immediate imm(value);
7108 if (destination.IsFpuRegister()) {
7109 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7110 if (value == 0) {
7111 // Easy handling of 0.0.
7112 __ xorps(dest, dest);
7113 } else {
7114 ScratchRegisterScope ensure_scratch(
7115 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7116 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
7117 __ movl(temp, Immediate(value));
7118 __ movd(dest, temp);
7119 }
7120 } else {
7121 DCHECK(destination.IsStackSlot()) << destination;
7122 __ movl(Address(ESP, destination.GetStackIndex()), imm);
7123 }
7124 } else if (constant->IsLongConstant()) {
7125 int64_t value = constant->AsLongConstant()->GetValue();
7126 int32_t low_value = Low32Bits(value);
7127 int32_t high_value = High32Bits(value);
7128 Immediate low(low_value);
7129 Immediate high(high_value);
7130 if (destination.IsDoubleStackSlot()) {
7131 __ movl(Address(ESP, destination.GetStackIndex()), low);
7132 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7133 } else {
7134 __ movl(destination.AsRegisterPairLow<Register>(), low);
7135 __ movl(destination.AsRegisterPairHigh<Register>(), high);
7136 }
7137 } else {
7138 DCHECK(constant->IsDoubleConstant());
7139 double dbl_value = constant->AsDoubleConstant()->GetValue();
7140 int64_t value = bit_cast<int64_t, double>(dbl_value);
7141 int32_t low_value = Low32Bits(value);
7142 int32_t high_value = High32Bits(value);
7143 Immediate low(low_value);
7144 Immediate high(high_value);
7145 if (destination.IsFpuRegister()) {
7146 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7147 if (value == 0) {
7148 // Easy handling of 0.0.
7149 __ xorpd(dest, dest);
7150 } else {
7151 __ pushl(high);
7152 __ cfi().AdjustCFAOffset(4);
7153 __ pushl(low);
7154 __ cfi().AdjustCFAOffset(4);
7155 __ movsd(dest, Address(ESP, 0));
7156 codegen_->DecreaseFrame(8);
7157 }
7158 } else {
7159 DCHECK(destination.IsDoubleStackSlot()) << destination;
7160 __ movl(Address(ESP, destination.GetStackIndex()), low);
7161 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7162 }
7163 }
7164 } else {
7165 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
7166 }
7167 }
7168
Exchange(Register reg,int mem)7169 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7170 Register suggested_scratch = reg == EAX ? EBX : EAX;
7171 ScratchRegisterScope ensure_scratch(
7172 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7173
7174 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7175 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7176 __ movl(Address(ESP, mem + stack_offset), reg);
7177 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7178 }
7179
Exchange32(XmmRegister reg,int mem)7180 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7181 ScratchRegisterScope ensure_scratch(
7182 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7183
7184 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7185 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7186 __ movl(temp_reg, Address(ESP, mem + stack_offset));
7187 __ movss(Address(ESP, mem + stack_offset), reg);
7188 __ movd(reg, temp_reg);
7189 }
7190
Exchange128(XmmRegister reg,int mem)7191 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7192 size_t extra_slot = 4 * kX86WordSize;
7193 codegen_->IncreaseFrame(extra_slot);
7194 __ movups(Address(ESP, 0), XmmRegister(reg));
7195 ExchangeMemory(0, mem + extra_slot, 4);
7196 __ movups(XmmRegister(reg), Address(ESP, 0));
7197 codegen_->DecreaseFrame(extra_slot);
7198 }
7199
ExchangeMemory(int mem1,int mem2,int number_of_words)7200 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7201 ScratchRegisterScope ensure_scratch1(
7202 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7203
7204 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7205 ScratchRegisterScope ensure_scratch2(
7206 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7207
7208 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7209 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7210
7211 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7212 for (int i = 0; i < number_of_words; i++) {
7213 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7214 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7215 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7216 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7217 stack_offset += kX86WordSize;
7218 }
7219 }
7220
EmitSwap(size_t index)7221 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7222 MoveOperands* move = moves_[index];
7223 Location source = move->GetSource();
7224 Location destination = move->GetDestination();
7225
7226 if (source.IsRegister() && destination.IsRegister()) {
7227 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7228 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7229 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7230 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7231 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7232 } else if (source.IsRegister() && destination.IsStackSlot()) {
7233 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7234 } else if (source.IsStackSlot() && destination.IsRegister()) {
7235 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7236 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7237 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7238 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7239 // Use XOR Swap algorithm to avoid a temporary.
7240 DCHECK_NE(source.reg(), destination.reg());
7241 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7242 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7243 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7244 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7245 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7246 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7247 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7248 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7249 // Take advantage of the 16 bytes in the XMM register.
7250 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7251 Address stack(ESP, destination.GetStackIndex());
7252 // Load the double into the high doubleword.
7253 __ movhpd(reg, stack);
7254
7255 // Store the low double into the destination.
7256 __ movsd(stack, reg);
7257
7258 // Move the high double to the low double.
7259 __ psrldq(reg, Immediate(8));
7260 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7261 // Take advantage of the 16 bytes in the XMM register.
7262 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7263 Address stack(ESP, source.GetStackIndex());
7264 // Load the double into the high doubleword.
7265 __ movhpd(reg, stack);
7266
7267 // Store the low double into the destination.
7268 __ movsd(stack, reg);
7269
7270 // Move the high double to the low double.
7271 __ psrldq(reg, Immediate(8));
7272 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7273 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7274 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7275 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7276 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7277 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7278 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7279 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7280 } else {
7281 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7282 }
7283 }
7284
SpillScratch(int reg)7285 void ParallelMoveResolverX86::SpillScratch(int reg) {
7286 __ pushl(static_cast<Register>(reg));
7287 }
7288
RestoreScratch(int reg)7289 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7290 __ popl(static_cast<Register>(reg));
7291 }
7292
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7293 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7294 HLoadClass::LoadKind desired_class_load_kind) {
7295 switch (desired_class_load_kind) {
7296 case HLoadClass::LoadKind::kInvalid:
7297 LOG(FATAL) << "UNREACHABLE";
7298 UNREACHABLE();
7299 case HLoadClass::LoadKind::kReferrersClass:
7300 break;
7301 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7302 case HLoadClass::LoadKind::kBootImageRelRo:
7303 case HLoadClass::LoadKind::kAppImageRelRo:
7304 case HLoadClass::LoadKind::kBssEntry:
7305 case HLoadClass::LoadKind::kBssEntryPublic:
7306 case HLoadClass::LoadKind::kBssEntryPackage:
7307 DCHECK(!GetCompilerOptions().IsJitCompiler());
7308 break;
7309 case HLoadClass::LoadKind::kJitBootImageAddress:
7310 case HLoadClass::LoadKind::kJitTableAddress:
7311 DCHECK(GetCompilerOptions().IsJitCompiler());
7312 break;
7313 case HLoadClass::LoadKind::kRuntimeCall:
7314 break;
7315 }
7316 return desired_class_load_kind;
7317 }
7318
VisitLoadClass(HLoadClass * cls)7319 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7320 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7321 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7322 InvokeRuntimeCallingConvention calling_convention;
7323 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7324 cls,
7325 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7326 Location::RegisterLocation(EAX));
7327 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7328 return;
7329 }
7330 DCHECK_EQ(cls->NeedsAccessCheck(),
7331 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7332 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7333
7334 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7335 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7336 ? LocationSummary::kCallOnSlowPath
7337 : LocationSummary::kNoCall;
7338 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7339 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7340 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7341 }
7342
7343 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7344 locations->SetInAt(0, Location::RequiresRegister());
7345 }
7346 locations->SetOut(Location::RequiresRegister());
7347 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7348 if (codegen_->EmitNonBakerReadBarrier()) {
7349 // For non-Baker read barrier we have a temp-clobbering call.
7350 } else {
7351 // Rely on the type resolution and/or initialization to save everything.
7352 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7353 }
7354 }
7355 }
7356
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7357 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7358 dex::TypeIndex type_index,
7359 Handle<mirror::Class> handle) {
7360 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7361 // Add a patch entry and return the label.
7362 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7363 PatchInfo<Label>* info = &jit_class_patches_.back();
7364 return &info->label;
7365 }
7366
7367 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7368 // move.
VisitLoadClass(HLoadClass * cls)7369 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7370 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7371 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7372 codegen_->GenerateLoadClassRuntimeCall(cls);
7373 return;
7374 }
7375 DCHECK_EQ(cls->NeedsAccessCheck(),
7376 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7377 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7378
7379 LocationSummary* locations = cls->GetLocations();
7380 Location out_loc = locations->Out();
7381 Register out = out_loc.AsRegister<Register>();
7382
7383 bool generate_null_check = false;
7384 const ReadBarrierOption read_barrier_option =
7385 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7386 switch (load_kind) {
7387 case HLoadClass::LoadKind::kReferrersClass: {
7388 DCHECK(!cls->CanCallRuntime());
7389 DCHECK(!cls->MustGenerateClinitCheck());
7390 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7391 Register current_method = locations->InAt(0).AsRegister<Register>();
7392 GenerateGcRootFieldLoad(
7393 cls,
7394 out_loc,
7395 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7396 /* fixup_label= */ nullptr,
7397 read_barrier_option);
7398 break;
7399 }
7400 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7401 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7402 codegen_->GetCompilerOptions().IsBootImageExtension());
7403 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7404 Register method_address = locations->InAt(0).AsRegister<Register>();
7405 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7406 codegen_->RecordBootImageTypePatch(cls);
7407 break;
7408 }
7409 case HLoadClass::LoadKind::kBootImageRelRo: {
7410 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7411 Register method_address = locations->InAt(0).AsRegister<Register>();
7412 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7413 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7414 CodeGenerator::GetBootImageOffset(cls));
7415 break;
7416 }
7417 case HLoadClass::LoadKind::kAppImageRelRo: {
7418 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7419 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7420 Register method_address = locations->InAt(0).AsRegister<Register>();
7421 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7422 codegen_->RecordAppImageTypePatch(cls);
7423 break;
7424 }
7425 case HLoadClass::LoadKind::kBssEntry:
7426 case HLoadClass::LoadKind::kBssEntryPublic:
7427 case HLoadClass::LoadKind::kBssEntryPackage: {
7428 Register method_address = locations->InAt(0).AsRegister<Register>();
7429 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7430 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7431 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7432 // No need for memory fence, thanks to the x86 memory model.
7433 generate_null_check = true;
7434 break;
7435 }
7436 case HLoadClass::LoadKind::kJitBootImageAddress: {
7437 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7438 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7439 DCHECK_NE(address, 0u);
7440 __ movl(out, Immediate(address));
7441 break;
7442 }
7443 case HLoadClass::LoadKind::kJitTableAddress: {
7444 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7445 Label* fixup_label = codegen_->NewJitRootClassPatch(
7446 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7447 // /* GcRoot<mirror::Class> */ out = *address
7448 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7449 break;
7450 }
7451 case HLoadClass::LoadKind::kRuntimeCall:
7452 case HLoadClass::LoadKind::kInvalid:
7453 LOG(FATAL) << "UNREACHABLE";
7454 UNREACHABLE();
7455 }
7456
7457 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7458 DCHECK(cls->CanCallRuntime());
7459 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7460 codegen_->AddSlowPath(slow_path);
7461
7462 if (generate_null_check) {
7463 __ testl(out, out);
7464 __ j(kEqual, slow_path->GetEntryLabel());
7465 }
7466
7467 if (cls->MustGenerateClinitCheck()) {
7468 GenerateClassInitializationCheck(slow_path, out);
7469 } else {
7470 __ Bind(slow_path->GetExitLabel());
7471 }
7472 }
7473 }
7474
VisitLoadMethodHandle(HLoadMethodHandle * load)7475 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7476 InvokeRuntimeCallingConvention calling_convention;
7477 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7478 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7479 }
7480
VisitLoadMethodHandle(HLoadMethodHandle * load)7481 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7482 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7483 }
7484
VisitLoadMethodType(HLoadMethodType * load)7485 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7486 InvokeRuntimeCallingConvention calling_convention;
7487 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7488 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7489 }
7490
VisitLoadMethodType(HLoadMethodType * load)7491 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7492 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7493 }
7494
VisitClinitCheck(HClinitCheck * check)7495 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7496 LocationSummary* locations =
7497 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7498 locations->SetInAt(0, Location::RequiresRegister());
7499 if (check->HasUses()) {
7500 locations->SetOut(Location::SameAsFirstInput());
7501 }
7502 // Rely on the type initialization to save everything we need.
7503 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7504 }
7505
VisitClinitCheck(HClinitCheck * check)7506 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7507 // We assume the class to not be null.
7508 SlowPathCode* slow_path =
7509 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7510 codegen_->AddSlowPath(slow_path);
7511 GenerateClassInitializationCheck(slow_path,
7512 check->GetLocations()->InAt(0).AsRegister<Register>());
7513 }
7514
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7515 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7516 SlowPathCode* slow_path, Register class_reg) {
7517 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
7518 __ j(kBelow, slow_path->GetEntryLabel());
7519 __ Bind(slow_path->GetExitLabel());
7520 }
7521
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7522 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7523 Register temp) {
7524 uint32_t path_to_root = check->GetBitstringPathToRoot();
7525 uint32_t mask = check->GetBitstringMask();
7526 DCHECK(IsPowerOfTwo(mask + 1));
7527 size_t mask_bits = WhichPowerOf2(mask + 1);
7528
7529 if (mask_bits == 16u) {
7530 // Compare the bitstring in memory.
7531 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7532 } else {
7533 // /* uint32_t */ temp = temp->status_
7534 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7535 // Compare the bitstring bits using SUB.
7536 __ subl(temp, Immediate(path_to_root));
7537 // Shift out bits that do not contribute to the comparison.
7538 __ shll(temp, Immediate(32u - mask_bits));
7539 }
7540 }
7541
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7542 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7543 HLoadString::LoadKind desired_string_load_kind) {
7544 switch (desired_string_load_kind) {
7545 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7546 case HLoadString::LoadKind::kBootImageRelRo:
7547 case HLoadString::LoadKind::kBssEntry:
7548 DCHECK(!GetCompilerOptions().IsJitCompiler());
7549 break;
7550 case HLoadString::LoadKind::kJitBootImageAddress:
7551 case HLoadString::LoadKind::kJitTableAddress:
7552 DCHECK(GetCompilerOptions().IsJitCompiler());
7553 break;
7554 case HLoadString::LoadKind::kRuntimeCall:
7555 break;
7556 }
7557 return desired_string_load_kind;
7558 }
7559
VisitLoadString(HLoadString * load)7560 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7561 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7562 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7563 HLoadString::LoadKind load_kind = load->GetLoadKind();
7564 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7565 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7566 load_kind == HLoadString::LoadKind::kBssEntry) {
7567 locations->SetInAt(0, Location::RequiresRegister());
7568 }
7569 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7570 locations->SetOut(Location::RegisterLocation(EAX));
7571 } else {
7572 locations->SetOut(Location::RequiresRegister());
7573 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7574 if (codegen_->EmitNonBakerReadBarrier()) {
7575 // For non-Baker read barrier we have a temp-clobbering call.
7576 } else {
7577 // Rely on the pResolveString to save everything.
7578 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7579 }
7580 }
7581 }
7582 }
7583
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7584 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7585 dex::StringIndex string_index,
7586 Handle<mirror::String> handle) {
7587 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7588 // Add a patch entry and return the label.
7589 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7590 PatchInfo<Label>* info = &jit_string_patches_.back();
7591 return &info->label;
7592 }
7593
7594 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7595 // move.
VisitLoadString(HLoadString * load)7596 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7597 LocationSummary* locations = load->GetLocations();
7598 Location out_loc = locations->Out();
7599 Register out = out_loc.AsRegister<Register>();
7600
7601 switch (load->GetLoadKind()) {
7602 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7603 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7604 codegen_->GetCompilerOptions().IsBootImageExtension());
7605 Register method_address = locations->InAt(0).AsRegister<Register>();
7606 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7607 codegen_->RecordBootImageStringPatch(load);
7608 return;
7609 }
7610 case HLoadString::LoadKind::kBootImageRelRo: {
7611 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7612 Register method_address = locations->InAt(0).AsRegister<Register>();
7613 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7614 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7615 CodeGenerator::GetBootImageOffset(load));
7616 return;
7617 }
7618 case HLoadString::LoadKind::kBssEntry: {
7619 Register method_address = locations->InAt(0).AsRegister<Register>();
7620 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7621 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7622 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7623 GenerateGcRootFieldLoad(
7624 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7625 // No need for memory fence, thanks to the x86 memory model.
7626 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7627 codegen_->AddSlowPath(slow_path);
7628 __ testl(out, out);
7629 __ j(kEqual, slow_path->GetEntryLabel());
7630 __ Bind(slow_path->GetExitLabel());
7631 return;
7632 }
7633 case HLoadString::LoadKind::kJitBootImageAddress: {
7634 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7635 DCHECK_NE(address, 0u);
7636 __ movl(out, Immediate(address));
7637 return;
7638 }
7639 case HLoadString::LoadKind::kJitTableAddress: {
7640 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7641 Label* fixup_label = codegen_->NewJitRootStringPatch(
7642 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7643 // /* GcRoot<mirror::String> */ out = *address
7644 GenerateGcRootFieldLoad(
7645 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7646 return;
7647 }
7648 default:
7649 break;
7650 }
7651
7652 InvokeRuntimeCallingConvention calling_convention;
7653 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7654 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7655 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7656 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7657 }
7658
GetExceptionTlsAddress()7659 static Address GetExceptionTlsAddress() {
7660 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7661 }
7662
VisitLoadException(HLoadException * load)7663 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7664 LocationSummary* locations =
7665 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7666 locations->SetOut(Location::RequiresRegister());
7667 }
7668
VisitLoadException(HLoadException * load)7669 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7670 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7671 }
7672
VisitClearException(HClearException * clear)7673 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7674 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7675 }
7676
VisitClearException(HClearException * clear)7677 void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
7678 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7679 }
7680
VisitThrow(HThrow * instruction)7681 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7682 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7683 instruction, LocationSummary::kCallOnMainOnly);
7684 InvokeRuntimeCallingConvention calling_convention;
7685 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7686 }
7687
VisitThrow(HThrow * instruction)7688 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7689 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7690 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7691 }
7692
7693 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7694 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7695 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7696 return 1;
7697 }
7698 if (emit_read_barrier &&
7699 !kUseBakerReadBarrier &&
7700 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7701 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7702 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7703 return 1;
7704 }
7705 return 0;
7706 }
7707
7708 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7709 // interface pointer, the current interface is compared in memory.
7710 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7711 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7712 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7713 }
7714
VisitInstanceOf(HInstanceOf * instruction)7715 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7716 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7717 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7718 bool baker_read_barrier_slow_path = false;
7719 switch (type_check_kind) {
7720 case TypeCheckKind::kExactCheck:
7721 case TypeCheckKind::kAbstractClassCheck:
7722 case TypeCheckKind::kClassHierarchyCheck:
7723 case TypeCheckKind::kArrayObjectCheck:
7724 case TypeCheckKind::kInterfaceCheck: {
7725 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7726 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7727 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7728 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7729 break;
7730 }
7731 case TypeCheckKind::kArrayCheck:
7732 case TypeCheckKind::kUnresolvedCheck:
7733 call_kind = LocationSummary::kCallOnSlowPath;
7734 break;
7735 case TypeCheckKind::kBitstringCheck:
7736 break;
7737 }
7738
7739 LocationSummary* locations =
7740 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7741 if (baker_read_barrier_slow_path) {
7742 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7743 }
7744 locations->SetInAt(0, Location::RequiresRegister());
7745 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7746 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7747 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7748 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7749 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7750 locations->SetInAt(1, Location::RequiresRegister());
7751 } else {
7752 locations->SetInAt(1, Location::Any());
7753 }
7754 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7755 locations->SetOut(Location::RequiresRegister());
7756 // When read barriers are enabled, we need a temporary register for some cases.
7757 locations->AddRegisterTemps(
7758 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7759 }
7760
VisitInstanceOf(HInstanceOf * instruction)7761 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7762 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7763 LocationSummary* locations = instruction->GetLocations();
7764 Location obj_loc = locations->InAt(0);
7765 Register obj = obj_loc.AsRegister<Register>();
7766 Location cls = locations->InAt(1);
7767 Location out_loc = locations->Out();
7768 Register out = out_loc.AsRegister<Register>();
7769 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7770 DCHECK_LE(num_temps, 1u);
7771 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7772 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7773 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7774 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7775 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7776 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7777 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7778 const uint32_t object_array_data_offset =
7779 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7780 SlowPathCode* slow_path = nullptr;
7781 NearLabel done, zero;
7782
7783 // Return 0 if `obj` is null.
7784 // Avoid null check if we know obj is not null.
7785 if (instruction->MustDoNullCheck()) {
7786 __ testl(obj, obj);
7787 __ j(kEqual, &zero);
7788 }
7789
7790 switch (type_check_kind) {
7791 case TypeCheckKind::kExactCheck: {
7792 ReadBarrierOption read_barrier_option =
7793 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7794 // /* HeapReference<Class> */ out = obj->klass_
7795 GenerateReferenceLoadTwoRegisters(instruction,
7796 out_loc,
7797 obj_loc,
7798 class_offset,
7799 read_barrier_option);
7800 if (cls.IsRegister()) {
7801 __ cmpl(out, cls.AsRegister<Register>());
7802 } else {
7803 DCHECK(cls.IsStackSlot()) << cls;
7804 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7805 }
7806
7807 // Classes must be equal for the instanceof to succeed.
7808 __ j(kNotEqual, &zero);
7809 __ movl(out, Immediate(1));
7810 __ jmp(&done);
7811 break;
7812 }
7813
7814 case TypeCheckKind::kAbstractClassCheck: {
7815 ReadBarrierOption read_barrier_option =
7816 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7817 // /* HeapReference<Class> */ out = obj->klass_
7818 GenerateReferenceLoadTwoRegisters(instruction,
7819 out_loc,
7820 obj_loc,
7821 class_offset,
7822 read_barrier_option);
7823 // If the class is abstract, we eagerly fetch the super class of the
7824 // object to avoid doing a comparison we know will fail.
7825 NearLabel loop;
7826 __ Bind(&loop);
7827 // /* HeapReference<Class> */ out = out->super_class_
7828 GenerateReferenceLoadOneRegister(instruction,
7829 out_loc,
7830 super_offset,
7831 maybe_temp_loc,
7832 read_barrier_option);
7833 __ testl(out, out);
7834 // If `out` is null, we use it for the result, and jump to `done`.
7835 __ j(kEqual, &done);
7836 if (cls.IsRegister()) {
7837 __ cmpl(out, cls.AsRegister<Register>());
7838 } else {
7839 DCHECK(cls.IsStackSlot()) << cls;
7840 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7841 }
7842 __ j(kNotEqual, &loop);
7843 __ movl(out, Immediate(1));
7844 if (zero.IsLinked()) {
7845 __ jmp(&done);
7846 }
7847 break;
7848 }
7849
7850 case TypeCheckKind::kClassHierarchyCheck: {
7851 ReadBarrierOption read_barrier_option =
7852 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7853 // /* HeapReference<Class> */ out = obj->klass_
7854 GenerateReferenceLoadTwoRegisters(instruction,
7855 out_loc,
7856 obj_loc,
7857 class_offset,
7858 read_barrier_option);
7859 // Walk over the class hierarchy to find a match.
7860 NearLabel loop, success;
7861 __ Bind(&loop);
7862 if (cls.IsRegister()) {
7863 __ cmpl(out, cls.AsRegister<Register>());
7864 } else {
7865 DCHECK(cls.IsStackSlot()) << cls;
7866 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7867 }
7868 __ j(kEqual, &success);
7869 // /* HeapReference<Class> */ out = out->super_class_
7870 GenerateReferenceLoadOneRegister(instruction,
7871 out_loc,
7872 super_offset,
7873 maybe_temp_loc,
7874 read_barrier_option);
7875 __ testl(out, out);
7876 __ j(kNotEqual, &loop);
7877 // If `out` is null, we use it for the result, and jump to `done`.
7878 __ jmp(&done);
7879 __ Bind(&success);
7880 __ movl(out, Immediate(1));
7881 if (zero.IsLinked()) {
7882 __ jmp(&done);
7883 }
7884 break;
7885 }
7886
7887 case TypeCheckKind::kArrayObjectCheck: {
7888 ReadBarrierOption read_barrier_option =
7889 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7890 // /* HeapReference<Class> */ out = obj->klass_
7891 GenerateReferenceLoadTwoRegisters(instruction,
7892 out_loc,
7893 obj_loc,
7894 class_offset,
7895 read_barrier_option);
7896 // Do an exact check.
7897 NearLabel exact_check;
7898 if (cls.IsRegister()) {
7899 __ cmpl(out, cls.AsRegister<Register>());
7900 } else {
7901 DCHECK(cls.IsStackSlot()) << cls;
7902 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7903 }
7904 __ j(kEqual, &exact_check);
7905 // Otherwise, we need to check that the object's class is a non-primitive array.
7906 // /* HeapReference<Class> */ out = out->component_type_
7907 GenerateReferenceLoadOneRegister(instruction,
7908 out_loc,
7909 component_offset,
7910 maybe_temp_loc,
7911 read_barrier_option);
7912 __ testl(out, out);
7913 // If `out` is null, we use it for the result, and jump to `done`.
7914 __ j(kEqual, &done);
7915 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7916 __ j(kNotEqual, &zero);
7917 __ Bind(&exact_check);
7918 __ movl(out, Immediate(1));
7919 __ jmp(&done);
7920 break;
7921 }
7922
7923 case TypeCheckKind::kArrayCheck: {
7924 // No read barrier since the slow path will retry upon failure.
7925 // /* HeapReference<Class> */ out = obj->klass_
7926 GenerateReferenceLoadTwoRegisters(instruction,
7927 out_loc,
7928 obj_loc,
7929 class_offset,
7930 kWithoutReadBarrier);
7931 if (cls.IsRegister()) {
7932 __ cmpl(out, cls.AsRegister<Register>());
7933 } else {
7934 DCHECK(cls.IsStackSlot()) << cls;
7935 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7936 }
7937 DCHECK(locations->OnlyCallsOnSlowPath());
7938 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7939 instruction, /* is_fatal= */ false);
7940 codegen_->AddSlowPath(slow_path);
7941 __ j(kNotEqual, slow_path->GetEntryLabel());
7942 __ movl(out, Immediate(1));
7943 if (zero.IsLinked()) {
7944 __ jmp(&done);
7945 }
7946 break;
7947 }
7948
7949 case TypeCheckKind::kInterfaceCheck: {
7950 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7951 DCHECK(locations->OnlyCallsOnSlowPath());
7952 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7953 instruction, /* is_fatal= */ false);
7954 codegen_->AddSlowPath(slow_path);
7955 if (codegen_->EmitNonBakerReadBarrier()) {
7956 __ jmp(slow_path->GetEntryLabel());
7957 break;
7958 }
7959 // For Baker read barrier, take the slow path while marking.
7960 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()),
7961 Immediate(0));
7962 __ j(kNotEqual, slow_path->GetEntryLabel());
7963 }
7964
7965 // Fast-path without read barriers.
7966 Register temp = maybe_temp_loc.AsRegister<Register>();
7967 // /* HeapReference<Class> */ temp = obj->klass_
7968 __ movl(temp, Address(obj, class_offset));
7969 __ MaybeUnpoisonHeapReference(temp);
7970 // /* HeapReference<Class> */ temp = temp->iftable_
7971 __ movl(temp, Address(temp, iftable_offset));
7972 __ MaybeUnpoisonHeapReference(temp);
7973 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7974 __ movl(out, Address(temp, array_length_offset));
7975 // Maybe poison the `cls` for direct comparison with memory.
7976 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7977 // Loop through the iftable and check if any class matches.
7978 NearLabel loop, end;
7979 __ Bind(&loop);
7980 // Check if we still have an entry to compare.
7981 __ subl(out, Immediate(2));
7982 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7983 // Go to next interface if the classes do not match.
7984 __ cmpl(cls.AsRegister<Register>(),
7985 CodeGeneratorX86::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7986 __ j(kNotEqual, &loop);
7987 if (zero.IsLinked()) {
7988 __ movl(out, Immediate(1));
7989 // If `cls` was poisoned above, unpoison it.
7990 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7991 __ jmp(&done);
7992 if (kPoisonHeapReferences) {
7993 // The false case needs to unpoison the class before jumping to `zero`.
7994 __ Bind(&end);
7995 __ UnpoisonHeapReference(cls.AsRegister<Register>());
7996 __ jmp(&zero);
7997 }
7998 } else {
7999 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
8000 __ movl(out, Immediate(-1));
8001 __ Bind(&end);
8002 __ addl(out, Immediate(2));
8003 // If `cls` was poisoned above, unpoison it.
8004 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8005 }
8006 break;
8007 }
8008
8009 case TypeCheckKind::kUnresolvedCheck: {
8010 // Note that we indeed only call on slow path, but we always go
8011 // into the slow path for the unresolved check case.
8012 //
8013 // We cannot directly call the InstanceofNonTrivial runtime
8014 // entry point without resorting to a type checking slow path
8015 // here (i.e. by calling InvokeRuntime directly), as it would
8016 // require to assign fixed registers for the inputs of this
8017 // HInstanceOf instruction (following the runtime calling
8018 // convention), which might be cluttered by the potential first
8019 // read barrier emission at the beginning of this method.
8020 //
8021 // TODO: Introduce a new runtime entry point taking the object
8022 // to test (instead of its class) as argument, and let it deal
8023 // with the read barrier issues. This will let us refactor this
8024 // case of the `switch` code as it was previously (with a direct
8025 // call to the runtime not using a type checking slow path).
8026 // This should also be beneficial for the other cases above.
8027 DCHECK(locations->OnlyCallsOnSlowPath());
8028 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8029 instruction, /* is_fatal= */ false);
8030 codegen_->AddSlowPath(slow_path);
8031 __ jmp(slow_path->GetEntryLabel());
8032 break;
8033 }
8034
8035 case TypeCheckKind::kBitstringCheck: {
8036 // /* HeapReference<Class> */ temp = obj->klass_
8037 GenerateReferenceLoadTwoRegisters(instruction,
8038 out_loc,
8039 obj_loc,
8040 class_offset,
8041 kWithoutReadBarrier);
8042
8043 GenerateBitstringTypeCheckCompare(instruction, out);
8044 __ j(kNotEqual, &zero);
8045 __ movl(out, Immediate(1));
8046 __ jmp(&done);
8047 break;
8048 }
8049 }
8050
8051 if (zero.IsLinked()) {
8052 __ Bind(&zero);
8053 __ xorl(out, out);
8054 }
8055
8056 if (done.IsLinked()) {
8057 __ Bind(&done);
8058 }
8059
8060 if (slow_path != nullptr) {
8061 __ Bind(slow_path->GetExitLabel());
8062 }
8063 }
8064
VisitCheckCast(HCheckCast * instruction)8065 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
8066 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8067 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8068 LocationSummary* locations =
8069 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8070 locations->SetInAt(0, Location::RequiresRegister());
8071 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8072 // Require a register for the interface check since there is a loop that compares the class to
8073 // a memory address.
8074 locations->SetInAt(1, Location::RequiresRegister());
8075 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8076 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8077 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8078 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8079 } else {
8080 locations->SetInAt(1, Location::Any());
8081 }
8082 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8083 }
8084
VisitCheckCast(HCheckCast * instruction)8085 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
8086 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8087 LocationSummary* locations = instruction->GetLocations();
8088 Location obj_loc = locations->InAt(0);
8089 Register obj = obj_loc.AsRegister<Register>();
8090 Location cls = locations->InAt(1);
8091 Location temp_loc = locations->GetTemp(0);
8092 Register temp = temp_loc.AsRegister<Register>();
8093 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8094 DCHECK_GE(num_temps, 1u);
8095 DCHECK_LE(num_temps, 2u);
8096 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8097 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8098 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8099 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8100 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8101 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8102 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8103 const uint32_t object_array_data_offset =
8104 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8105
8106 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8107 SlowPathCode* type_check_slow_path =
8108 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8109 instruction, is_type_check_slow_path_fatal);
8110 codegen_->AddSlowPath(type_check_slow_path);
8111
8112 NearLabel done;
8113 // Avoid null check if we know obj is not null.
8114 if (instruction->MustDoNullCheck()) {
8115 __ testl(obj, obj);
8116 __ j(kEqual, &done);
8117 }
8118
8119 switch (type_check_kind) {
8120 case TypeCheckKind::kExactCheck:
8121 case TypeCheckKind::kArrayCheck: {
8122 // /* HeapReference<Class> */ temp = obj->klass_
8123 GenerateReferenceLoadTwoRegisters(instruction,
8124 temp_loc,
8125 obj_loc,
8126 class_offset,
8127 kWithoutReadBarrier);
8128
8129 if (cls.IsRegister()) {
8130 __ cmpl(temp, cls.AsRegister<Register>());
8131 } else {
8132 DCHECK(cls.IsStackSlot()) << cls;
8133 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8134 }
8135 // Jump to slow path for throwing the exception or doing a
8136 // more involved array check.
8137 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8138 break;
8139 }
8140
8141 case TypeCheckKind::kAbstractClassCheck: {
8142 // /* HeapReference<Class> */ temp = obj->klass_
8143 GenerateReferenceLoadTwoRegisters(instruction,
8144 temp_loc,
8145 obj_loc,
8146 class_offset,
8147 kWithoutReadBarrier);
8148
8149 // If the class is abstract, we eagerly fetch the super class of the
8150 // object to avoid doing a comparison we know will fail.
8151 NearLabel loop;
8152 __ Bind(&loop);
8153 // /* HeapReference<Class> */ temp = temp->super_class_
8154 GenerateReferenceLoadOneRegister(instruction,
8155 temp_loc,
8156 super_offset,
8157 maybe_temp2_loc,
8158 kWithoutReadBarrier);
8159
8160 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8161 // exception.
8162 __ testl(temp, temp);
8163 __ j(kZero, type_check_slow_path->GetEntryLabel());
8164
8165 // Otherwise, compare the classes
8166 if (cls.IsRegister()) {
8167 __ cmpl(temp, cls.AsRegister<Register>());
8168 } else {
8169 DCHECK(cls.IsStackSlot()) << cls;
8170 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8171 }
8172 __ j(kNotEqual, &loop);
8173 break;
8174 }
8175
8176 case TypeCheckKind::kClassHierarchyCheck: {
8177 // /* HeapReference<Class> */ temp = obj->klass_
8178 GenerateReferenceLoadTwoRegisters(instruction,
8179 temp_loc,
8180 obj_loc,
8181 class_offset,
8182 kWithoutReadBarrier);
8183
8184 // Walk over the class hierarchy to find a match.
8185 NearLabel loop;
8186 __ Bind(&loop);
8187 if (cls.IsRegister()) {
8188 __ cmpl(temp, cls.AsRegister<Register>());
8189 } else {
8190 DCHECK(cls.IsStackSlot()) << cls;
8191 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8192 }
8193 __ j(kEqual, &done);
8194
8195 // /* HeapReference<Class> */ temp = temp->super_class_
8196 GenerateReferenceLoadOneRegister(instruction,
8197 temp_loc,
8198 super_offset,
8199 maybe_temp2_loc,
8200 kWithoutReadBarrier);
8201
8202 // If the class reference currently in `temp` is not null, jump
8203 // back at the beginning of the loop.
8204 __ testl(temp, temp);
8205 __ j(kNotZero, &loop);
8206 // Otherwise, jump to the slow path to throw the exception.;
8207 __ jmp(type_check_slow_path->GetEntryLabel());
8208 break;
8209 }
8210
8211 case TypeCheckKind::kArrayObjectCheck: {
8212 // /* HeapReference<Class> */ temp = obj->klass_
8213 GenerateReferenceLoadTwoRegisters(instruction,
8214 temp_loc,
8215 obj_loc,
8216 class_offset,
8217 kWithoutReadBarrier);
8218
8219 // Do an exact check.
8220 if (cls.IsRegister()) {
8221 __ cmpl(temp, cls.AsRegister<Register>());
8222 } else {
8223 DCHECK(cls.IsStackSlot()) << cls;
8224 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8225 }
8226 __ j(kEqual, &done);
8227
8228 // Otherwise, we need to check that the object's class is a non-primitive array.
8229 // /* HeapReference<Class> */ temp = temp->component_type_
8230 GenerateReferenceLoadOneRegister(instruction,
8231 temp_loc,
8232 component_offset,
8233 maybe_temp2_loc,
8234 kWithoutReadBarrier);
8235
8236 // If the component type is null (i.e. the object not an array), jump to the slow path to
8237 // throw the exception. Otherwise proceed with the check.
8238 __ testl(temp, temp);
8239 __ j(kZero, type_check_slow_path->GetEntryLabel());
8240
8241 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8242 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8243 break;
8244 }
8245
8246 case TypeCheckKind::kUnresolvedCheck:
8247 // We always go into the type check slow path for the unresolved check case.
8248 // We cannot directly call the CheckCast runtime entry point
8249 // without resorting to a type checking slow path here (i.e. by
8250 // calling InvokeRuntime directly), as it would require to
8251 // assign fixed registers for the inputs of this HInstanceOf
8252 // instruction (following the runtime calling convention), which
8253 // might be cluttered by the potential first read barrier
8254 // emission at the beginning of this method.
8255 __ jmp(type_check_slow_path->GetEntryLabel());
8256 break;
8257
8258 case TypeCheckKind::kInterfaceCheck: {
8259 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8260 // We can not get false positives by doing this.
8261 // /* HeapReference<Class> */ temp = obj->klass_
8262 GenerateReferenceLoadTwoRegisters(instruction,
8263 temp_loc,
8264 obj_loc,
8265 class_offset,
8266 kWithoutReadBarrier);
8267
8268 // /* HeapReference<Class> */ temp = temp->iftable_
8269 GenerateReferenceLoadOneRegister(instruction,
8270 temp_loc,
8271 iftable_offset,
8272 maybe_temp2_loc,
8273 kWithoutReadBarrier);
8274 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8275 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8276 // Maybe poison the `cls` for direct comparison with memory.
8277 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8278 // Loop through the iftable and check if any class matches.
8279 NearLabel start_loop;
8280 __ Bind(&start_loop);
8281 // Check if we still have an entry to compare.
8282 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8283 __ j(kNegative, type_check_slow_path->GetEntryLabel());
8284 // Go to next interface if the classes do not match.
8285 __ cmpl(cls.AsRegister<Register>(),
8286 CodeGeneratorX86::ArrayAddress(temp,
8287 maybe_temp2_loc,
8288 TIMES_4,
8289 object_array_data_offset));
8290 __ j(kNotEqual, &start_loop);
8291 // If `cls` was poisoned above, unpoison it.
8292 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8293 break;
8294 }
8295
8296 case TypeCheckKind::kBitstringCheck: {
8297 // /* HeapReference<Class> */ temp = obj->klass_
8298 GenerateReferenceLoadTwoRegisters(instruction,
8299 temp_loc,
8300 obj_loc,
8301 class_offset,
8302 kWithoutReadBarrier);
8303
8304 GenerateBitstringTypeCheckCompare(instruction, temp);
8305 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8306 break;
8307 }
8308 }
8309 __ Bind(&done);
8310
8311 __ Bind(type_check_slow_path->GetExitLabel());
8312 }
8313
VisitMonitorOperation(HMonitorOperation * instruction)8314 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8315 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8316 instruction, LocationSummary::kCallOnMainOnly);
8317 InvokeRuntimeCallingConvention calling_convention;
8318 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8319 }
8320
VisitMonitorOperation(HMonitorOperation * instruction)8321 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8322 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8323 : kQuickUnlockObject,
8324 instruction,
8325 instruction->GetDexPc());
8326 if (instruction->IsEnter()) {
8327 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8328 } else {
8329 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8330 }
8331 }
8332
VisitX86AndNot(HX86AndNot * instruction)8333 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8334 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8335 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8336 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8337 locations->SetInAt(0, Location::RequiresRegister());
8338 locations->SetInAt(1, Location::RequiresRegister());
8339 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8340 }
8341
VisitX86AndNot(HX86AndNot * instruction)8342 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8343 LocationSummary* locations = instruction->GetLocations();
8344 Location first = locations->InAt(0);
8345 Location second = locations->InAt(1);
8346 Location dest = locations->Out();
8347 if (instruction->GetResultType() == DataType::Type::kInt32) {
8348 __ andn(dest.AsRegister<Register>(),
8349 first.AsRegister<Register>(),
8350 second.AsRegister<Register>());
8351 } else {
8352 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8353 __ andn(dest.AsRegisterPairLow<Register>(),
8354 first.AsRegisterPairLow<Register>(),
8355 second.AsRegisterPairLow<Register>());
8356 __ andn(dest.AsRegisterPairHigh<Register>(),
8357 first.AsRegisterPairHigh<Register>(),
8358 second.AsRegisterPairHigh<Register>());
8359 }
8360 }
8361
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8362 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8363 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8364 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8365 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8366 locations->SetInAt(0, Location::RequiresRegister());
8367 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8368 }
8369
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8370 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8371 HX86MaskOrResetLeastSetBit* instruction) {
8372 LocationSummary* locations = instruction->GetLocations();
8373 Location src = locations->InAt(0);
8374 Location dest = locations->Out();
8375 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8376 switch (instruction->GetOpKind()) {
8377 case HInstruction::kAnd:
8378 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8379 break;
8380 case HInstruction::kXor:
8381 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8382 break;
8383 default:
8384 LOG(FATAL) << "Unreachable";
8385 }
8386 }
8387
VisitAnd(HAnd * instruction)8388 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8389 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8390 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8391
HandleBitwiseOperation(HBinaryOperation * instruction)8392 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8393 LocationSummary* locations =
8394 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8395 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8396 || instruction->GetResultType() == DataType::Type::kInt64);
8397 locations->SetInAt(0, Location::RequiresRegister());
8398 locations->SetInAt(1, Location::Any());
8399 locations->SetOut(Location::SameAsFirstInput());
8400 }
8401
VisitAnd(HAnd * instruction)8402 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8403 HandleBitwiseOperation(instruction);
8404 }
8405
VisitOr(HOr * instruction)8406 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8407 HandleBitwiseOperation(instruction);
8408 }
8409
VisitXor(HXor * instruction)8410 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8411 HandleBitwiseOperation(instruction);
8412 }
8413
HandleBitwiseOperation(HBinaryOperation * instruction)8414 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8415 LocationSummary* locations = instruction->GetLocations();
8416 Location first = locations->InAt(0);
8417 Location second = locations->InAt(1);
8418 DCHECK(first.Equals(locations->Out()));
8419
8420 if (instruction->GetResultType() == DataType::Type::kInt32) {
8421 if (second.IsRegister()) {
8422 if (instruction->IsAnd()) {
8423 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8424 } else if (instruction->IsOr()) {
8425 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8426 } else {
8427 DCHECK(instruction->IsXor());
8428 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8429 }
8430 } else if (second.IsConstant()) {
8431 if (instruction->IsAnd()) {
8432 __ andl(first.AsRegister<Register>(),
8433 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8434 } else if (instruction->IsOr()) {
8435 __ orl(first.AsRegister<Register>(),
8436 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8437 } else {
8438 DCHECK(instruction->IsXor());
8439 __ xorl(first.AsRegister<Register>(),
8440 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8441 }
8442 } else {
8443 if (instruction->IsAnd()) {
8444 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8445 } else if (instruction->IsOr()) {
8446 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8447 } else {
8448 DCHECK(instruction->IsXor());
8449 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8450 }
8451 }
8452 } else {
8453 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8454 if (second.IsRegisterPair()) {
8455 if (instruction->IsAnd()) {
8456 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8457 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8458 } else if (instruction->IsOr()) {
8459 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8460 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8461 } else {
8462 DCHECK(instruction->IsXor());
8463 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8464 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8465 }
8466 } else if (second.IsDoubleStackSlot()) {
8467 if (instruction->IsAnd()) {
8468 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8469 __ andl(first.AsRegisterPairHigh<Register>(),
8470 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8471 } else if (instruction->IsOr()) {
8472 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8473 __ orl(first.AsRegisterPairHigh<Register>(),
8474 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8475 } else {
8476 DCHECK(instruction->IsXor());
8477 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8478 __ xorl(first.AsRegisterPairHigh<Register>(),
8479 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8480 }
8481 } else {
8482 DCHECK(second.IsConstant()) << second;
8483 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8484 int32_t low_value = Low32Bits(value);
8485 int32_t high_value = High32Bits(value);
8486 Immediate low(low_value);
8487 Immediate high(high_value);
8488 Register first_low = first.AsRegisterPairLow<Register>();
8489 Register first_high = first.AsRegisterPairHigh<Register>();
8490 if (instruction->IsAnd()) {
8491 if (low_value == 0) {
8492 __ xorl(first_low, first_low);
8493 } else if (low_value != -1) {
8494 __ andl(first_low, low);
8495 }
8496 if (high_value == 0) {
8497 __ xorl(first_high, first_high);
8498 } else if (high_value != -1) {
8499 __ andl(first_high, high);
8500 }
8501 } else if (instruction->IsOr()) {
8502 if (low_value != 0) {
8503 __ orl(first_low, low);
8504 }
8505 if (high_value != 0) {
8506 __ orl(first_high, high);
8507 }
8508 } else {
8509 DCHECK(instruction->IsXor());
8510 if (low_value != 0) {
8511 __ xorl(first_low, low);
8512 }
8513 if (high_value != 0) {
8514 __ xorl(first_high, high);
8515 }
8516 }
8517 }
8518 }
8519 }
8520
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8521 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8522 HInstruction* instruction,
8523 Location out,
8524 uint32_t offset,
8525 Location maybe_temp,
8526 ReadBarrierOption read_barrier_option) {
8527 Register out_reg = out.AsRegister<Register>();
8528 if (read_barrier_option == kWithReadBarrier) {
8529 DCHECK(codegen_->EmitReadBarrier());
8530 if (kUseBakerReadBarrier) {
8531 // Load with fast path based Baker's read barrier.
8532 // /* HeapReference<Object> */ out = *(out + offset)
8533 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8534 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8535 } else {
8536 // Load with slow path based read barrier.
8537 // Save the value of `out` into `maybe_temp` before overwriting it
8538 // in the following move operation, as we will need it for the
8539 // read barrier below.
8540 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8541 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8542 // /* HeapReference<Object> */ out = *(out + offset)
8543 __ movl(out_reg, Address(out_reg, offset));
8544 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8545 }
8546 } else {
8547 // Plain load with no read barrier.
8548 // /* HeapReference<Object> */ out = *(out + offset)
8549 __ movl(out_reg, Address(out_reg, offset));
8550 __ MaybeUnpoisonHeapReference(out_reg);
8551 }
8552 }
8553
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8554 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8555 HInstruction* instruction,
8556 Location out,
8557 Location obj,
8558 uint32_t offset,
8559 ReadBarrierOption read_barrier_option) {
8560 Register out_reg = out.AsRegister<Register>();
8561 Register obj_reg = obj.AsRegister<Register>();
8562 if (read_barrier_option == kWithReadBarrier) {
8563 DCHECK(codegen_->EmitReadBarrier());
8564 if (kUseBakerReadBarrier) {
8565 // Load with fast path based Baker's read barrier.
8566 // /* HeapReference<Object> */ out = *(obj + offset)
8567 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8568 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8569 } else {
8570 // Load with slow path based read barrier.
8571 // /* HeapReference<Object> */ out = *(obj + offset)
8572 __ movl(out_reg, Address(obj_reg, offset));
8573 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8574 }
8575 } else {
8576 // Plain load with no read barrier.
8577 // /* HeapReference<Object> */ out = *(obj + offset)
8578 __ movl(out_reg, Address(obj_reg, offset));
8579 __ MaybeUnpoisonHeapReference(out_reg);
8580 }
8581 }
8582
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8583 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8584 HInstruction* instruction,
8585 Location root,
8586 const Address& address,
8587 Label* fixup_label,
8588 ReadBarrierOption read_barrier_option) {
8589 Register root_reg = root.AsRegister<Register>();
8590 if (read_barrier_option == kWithReadBarrier) {
8591 DCHECK(codegen_->EmitReadBarrier());
8592 if (kUseBakerReadBarrier) {
8593 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8594 // Baker's read barrier are used:
8595 //
8596 // root = obj.field;
8597 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8598 // if (temp != null) {
8599 // root = temp(root)
8600 // }
8601
8602 // /* GcRoot<mirror::Object> */ root = *address
8603 __ movl(root_reg, address);
8604 if (fixup_label != nullptr) {
8605 __ Bind(fixup_label);
8606 }
8607 static_assert(
8608 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8609 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8610 "have different sizes.");
8611 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8612 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8613 "have different sizes.");
8614
8615 // Slow path marking the GC root `root`.
8616 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8617 instruction, root, /* unpoison_ref_before_marking= */ false);
8618 codegen_->AddSlowPath(slow_path);
8619
8620 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8621 const int32_t entry_point_offset =
8622 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8623 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8624 // The entrypoint is null when the GC is not marking.
8625 __ j(kNotEqual, slow_path->GetEntryLabel());
8626 __ Bind(slow_path->GetExitLabel());
8627 } else {
8628 // GC root loaded through a slow path for read barriers other
8629 // than Baker's.
8630 // /* GcRoot<mirror::Object>* */ root = address
8631 __ leal(root_reg, address);
8632 if (fixup_label != nullptr) {
8633 __ Bind(fixup_label);
8634 }
8635 // /* mirror::Object* */ root = root->Read()
8636 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8637 }
8638 } else {
8639 // Plain GC root load with no read barrier.
8640 // /* GcRoot<mirror::Object> */ root = *address
8641 __ movl(root_reg, address);
8642 if (fixup_label != nullptr) {
8643 __ Bind(fixup_label);
8644 }
8645 // Note that GC roots are not affected by heap poisoning, thus we
8646 // do not have to unpoison `root_reg` here.
8647 }
8648 }
8649
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8650 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8651 Location ref,
8652 Register obj,
8653 uint32_t offset,
8654 bool needs_null_check) {
8655 DCHECK(EmitBakerReadBarrier());
8656
8657 // /* HeapReference<Object> */ ref = *(obj + offset)
8658 Address src(obj, offset);
8659 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8660 }
8661
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8662 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8663 Location ref,
8664 Register obj,
8665 uint32_t data_offset,
8666 Location index,
8667 bool needs_null_check) {
8668 DCHECK(EmitBakerReadBarrier());
8669
8670 static_assert(
8671 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8672 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8673 // /* HeapReference<Object> */ ref =
8674 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8675 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8676 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8677 }
8678
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8679 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8680 Location ref,
8681 Register obj,
8682 const Address& src,
8683 bool needs_null_check,
8684 bool always_update_field,
8685 Register* temp) {
8686 DCHECK(EmitBakerReadBarrier());
8687
8688 // In slow path based read barriers, the read barrier call is
8689 // inserted after the original load. However, in fast path based
8690 // Baker's read barriers, we need to perform the load of
8691 // mirror::Object::monitor_ *before* the original reference load.
8692 // This load-load ordering is required by the read barrier.
8693 // The fast path/slow path (for Baker's algorithm) should look like:
8694 //
8695 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8696 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8697 // HeapReference<Object> ref = *src; // Original reference load.
8698 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8699 // if (is_gray) {
8700 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8701 // }
8702 //
8703 // Note: the original implementation in ReadBarrier::Barrier is
8704 // slightly more complex as:
8705 // - it implements the load-load fence using a data dependency on
8706 // the high-bits of rb_state, which are expected to be all zeroes
8707 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8708 // which is a no-op thanks to the x86 memory model);
8709 // - it performs additional checks that we do not do here for
8710 // performance reasons.
8711
8712 Register ref_reg = ref.AsRegister<Register>();
8713 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8714
8715 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8716 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8717 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8718 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8719 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8720 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8721
8722 // if (rb_state == ReadBarrier::GrayState())
8723 // ref = ReadBarrier::Mark(ref);
8724 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8725 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8726 if (needs_null_check) {
8727 MaybeRecordImplicitNullCheck(instruction);
8728 }
8729
8730 // Load fence to prevent load-load reordering.
8731 // Note that this is a no-op, thanks to the x86 memory model.
8732 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8733
8734 // The actual reference load.
8735 // /* HeapReference<Object> */ ref = *src
8736 __ movl(ref_reg, src); // Flags are unaffected.
8737
8738 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8739 // Slow path marking the object `ref` when it is gray.
8740 SlowPathCode* slow_path;
8741 if (always_update_field) {
8742 DCHECK(temp != nullptr);
8743 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8744 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8745 } else {
8746 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8747 instruction, ref, /* unpoison_ref_before_marking= */ true);
8748 }
8749 AddSlowPath(slow_path);
8750
8751 // We have done the "if" of the gray bit check above, now branch based on the flags.
8752 __ j(kNotZero, slow_path->GetEntryLabel());
8753
8754 // Object* ref = ref_addr->AsMirrorPtr()
8755 __ MaybeUnpoisonHeapReference(ref_reg);
8756
8757 __ Bind(slow_path->GetExitLabel());
8758 }
8759
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8760 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8761 Location out,
8762 Location ref,
8763 Location obj,
8764 uint32_t offset,
8765 Location index) {
8766 DCHECK(EmitReadBarrier());
8767
8768 // Insert a slow path based read barrier *after* the reference load.
8769 //
8770 // If heap poisoning is enabled, the unpoisoning of the loaded
8771 // reference will be carried out by the runtime within the slow
8772 // path.
8773 //
8774 // Note that `ref` currently does not get unpoisoned (when heap
8775 // poisoning is enabled), which is alright as the `ref` argument is
8776 // not used by the artReadBarrierSlow entry point.
8777 //
8778 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8779 SlowPathCode* slow_path = new (GetScopedAllocator())
8780 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8781 AddSlowPath(slow_path);
8782
8783 __ jmp(slow_path->GetEntryLabel());
8784 __ Bind(slow_path->GetExitLabel());
8785 }
8786
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8787 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8788 Location out,
8789 Location ref,
8790 Location obj,
8791 uint32_t offset,
8792 Location index) {
8793 if (EmitReadBarrier()) {
8794 // Baker's read barriers shall be handled by the fast path
8795 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8796 DCHECK(!kUseBakerReadBarrier);
8797 // If heap poisoning is enabled, unpoisoning will be taken care of
8798 // by the runtime within the slow path.
8799 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8800 } else if (kPoisonHeapReferences) {
8801 __ UnpoisonHeapReference(out.AsRegister<Register>());
8802 }
8803 }
8804
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8805 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8806 Location out,
8807 Location root) {
8808 DCHECK(EmitReadBarrier());
8809
8810 // Insert a slow path based read barrier *after* the GC root load.
8811 //
8812 // Note that GC roots are not affected by heap poisoning, so we do
8813 // not need to do anything special for this here.
8814 SlowPathCode* slow_path =
8815 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8816 AddSlowPath(slow_path);
8817
8818 __ jmp(slow_path->GetEntryLabel());
8819 __ Bind(slow_path->GetExitLabel());
8820 }
8821
VisitBoundType(HBoundType * instruction)8822 void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8823 // Nothing to do, this should be removed during prepare for register allocator.
8824 LOG(FATAL) << "Unreachable";
8825 }
8826
VisitBoundType(HBoundType * instruction)8827 void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8828 // Nothing to do, this should be removed during prepare for register allocator.
8829 LOG(FATAL) << "Unreachable";
8830 }
8831
8832 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8833 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8834 LocationSummary* locations =
8835 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8836 locations->SetInAt(0, Location::RequiresRegister());
8837 }
8838
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8839 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8840 int32_t lower_bound,
8841 uint32_t num_entries,
8842 HBasicBlock* switch_block,
8843 HBasicBlock* default_block) {
8844 // Figure out the correct compare values and jump conditions.
8845 // Handle the first compare/branch as a special case because it might
8846 // jump to the default case.
8847 DCHECK_GT(num_entries, 2u);
8848 Condition first_condition;
8849 uint32_t index;
8850 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8851 if (lower_bound != 0) {
8852 first_condition = kLess;
8853 __ cmpl(value_reg, Immediate(lower_bound));
8854 __ j(first_condition, codegen_->GetLabelOf(default_block));
8855 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8856
8857 index = 1;
8858 } else {
8859 // Handle all the compare/jumps below.
8860 first_condition = kBelow;
8861 index = 0;
8862 }
8863
8864 // Handle the rest of the compare/jumps.
8865 for (; index + 1 < num_entries; index += 2) {
8866 int32_t compare_to_value = lower_bound + index + 1;
8867 __ cmpl(value_reg, Immediate(compare_to_value));
8868 // Jump to successors[index] if value < case_value[index].
8869 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8870 // Jump to successors[index + 1] if value == case_value[index + 1].
8871 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8872 }
8873
8874 if (index != num_entries) {
8875 // There are an odd number of entries. Handle the last one.
8876 DCHECK_EQ(index + 1, num_entries);
8877 __ cmpl(value_reg, Immediate(lower_bound + index));
8878 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8879 }
8880
8881 // And the default for any other value.
8882 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8883 __ jmp(codegen_->GetLabelOf(default_block));
8884 }
8885 }
8886
VisitPackedSwitch(HPackedSwitch * switch_instr)8887 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8888 int32_t lower_bound = switch_instr->GetStartValue();
8889 uint32_t num_entries = switch_instr->GetNumEntries();
8890 LocationSummary* locations = switch_instr->GetLocations();
8891 Register value_reg = locations->InAt(0).AsRegister<Register>();
8892
8893 GenPackedSwitchWithCompares(value_reg,
8894 lower_bound,
8895 num_entries,
8896 switch_instr->GetBlock(),
8897 switch_instr->GetDefaultBlock());
8898 }
8899
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8900 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8901 LocationSummary* locations =
8902 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8903 locations->SetInAt(0, Location::RequiresRegister());
8904
8905 // Constant area pointer.
8906 locations->SetInAt(1, Location::RequiresRegister());
8907
8908 // And the temporary we need.
8909 locations->AddTemp(Location::RequiresRegister());
8910 }
8911
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8912 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8913 int32_t lower_bound = switch_instr->GetStartValue();
8914 uint32_t num_entries = switch_instr->GetNumEntries();
8915 LocationSummary* locations = switch_instr->GetLocations();
8916 Register value_reg = locations->InAt(0).AsRegister<Register>();
8917 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8918
8919 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8920 GenPackedSwitchWithCompares(value_reg,
8921 lower_bound,
8922 num_entries,
8923 switch_instr->GetBlock(),
8924 default_block);
8925 return;
8926 }
8927
8928 // Optimizing has a jump area.
8929 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8930 Register constant_area = locations->InAt(1).AsRegister<Register>();
8931
8932 // Remove the bias, if needed.
8933 if (lower_bound != 0) {
8934 __ leal(temp_reg, Address(value_reg, -lower_bound));
8935 value_reg = temp_reg;
8936 }
8937
8938 // Is the value in range?
8939 DCHECK_GE(num_entries, 1u);
8940 __ cmpl(value_reg, Immediate(num_entries - 1));
8941 __ j(kAbove, codegen_->GetLabelOf(default_block));
8942
8943 // We are in the range of the table.
8944 // Load (target-constant_area) from the jump table, indexing by the value.
8945 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8946
8947 // Compute the actual target address by adding in constant_area.
8948 __ addl(temp_reg, constant_area);
8949
8950 // And jump.
8951 __ jmp(temp_reg);
8952 }
8953
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8954 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8955 HX86ComputeBaseMethodAddress* insn) {
8956 LocationSummary* locations =
8957 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8958 locations->SetOut(Location::RequiresRegister());
8959 }
8960
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8961 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8962 HX86ComputeBaseMethodAddress* insn) {
8963 LocationSummary* locations = insn->GetLocations();
8964 Register reg = locations->Out().AsRegister<Register>();
8965
8966 // Generate call to next instruction.
8967 Label next_instruction;
8968 __ call(&next_instruction);
8969 __ Bind(&next_instruction);
8970
8971 // Remember this offset for later use with constant area.
8972 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8973
8974 // Grab the return address off the stack.
8975 __ popl(reg);
8976 }
8977
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8978 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8979 HX86LoadFromConstantTable* insn) {
8980 LocationSummary* locations =
8981 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8982
8983 locations->SetInAt(0, Location::RequiresRegister());
8984 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8985
8986 // If we don't need to be materialized, we only need the inputs to be set.
8987 if (insn->IsEmittedAtUseSite()) {
8988 return;
8989 }
8990
8991 switch (insn->GetType()) {
8992 case DataType::Type::kFloat32:
8993 case DataType::Type::kFloat64:
8994 locations->SetOut(Location::RequiresFpuRegister());
8995 break;
8996
8997 case DataType::Type::kInt32:
8998 locations->SetOut(Location::RequiresRegister());
8999 break;
9000
9001 default:
9002 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9003 }
9004 }
9005
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9006 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
9007 if (insn->IsEmittedAtUseSite()) {
9008 return;
9009 }
9010
9011 LocationSummary* locations = insn->GetLocations();
9012 Location out = locations->Out();
9013 Register const_area = locations->InAt(0).AsRegister<Register>();
9014 HConstant *value = insn->GetConstant();
9015
9016 switch (insn->GetType()) {
9017 case DataType::Type::kFloat32:
9018 __ movss(out.AsFpuRegister<XmmRegister>(),
9019 codegen_->LiteralFloatAddress(
9020 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9021 break;
9022
9023 case DataType::Type::kFloat64:
9024 __ movsd(out.AsFpuRegister<XmmRegister>(),
9025 codegen_->LiteralDoubleAddress(
9026 value->AsDoubleConstant()->GetValue(),
9027 insn->GetBaseMethodAddress(),
9028 const_area));
9029 break;
9030
9031 case DataType::Type::kInt32:
9032 __ movl(out.AsRegister<Register>(),
9033 codegen_->LiteralInt32Address(
9034 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9035 break;
9036
9037 default:
9038 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9039 }
9040 }
9041
9042 /**
9043 * Class to handle late fixup of offsets into constant area.
9044 */
9045 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
9046 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)9047 RIPFixup(CodeGeneratorX86& codegen,
9048 HX86ComputeBaseMethodAddress* base_method_address,
9049 size_t offset)
9050 : codegen_(&codegen),
9051 base_method_address_(base_method_address),
9052 offset_into_constant_area_(offset) {}
9053
9054 protected:
SetOffset(size_t offset)9055 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
9056
9057 CodeGeneratorX86* codegen_;
9058 HX86ComputeBaseMethodAddress* base_method_address_;
9059
9060 private:
Process(const MemoryRegion & region,int pos)9061 void Process(const MemoryRegion& region, int pos) override {
9062 // Patch the correct offset for the instruction. The place to patch is the
9063 // last 4 bytes of the instruction.
9064 // The value to patch is the distance from the offset in the constant area
9065 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
9066 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
9067 int32_t relative_position =
9068 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
9069
9070 // Patch in the right value.
9071 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
9072 }
9073
9074 // Location in constant area that the fixup refers to.
9075 int32_t offset_into_constant_area_;
9076 };
9077
9078 /**
9079 * Class to handle late fixup of offsets to a jump table that will be created in the
9080 * constant area.
9081 */
9082 class JumpTableRIPFixup : public RIPFixup {
9083 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)9084 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
9085 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
9086 switch_instr_(switch_instr) {}
9087
CreateJumpTable()9088 void CreateJumpTable() {
9089 X86Assembler* assembler = codegen_->GetAssembler();
9090
9091 // Ensure that the reference to the jump table has the correct offset.
9092 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
9093 SetOffset(offset_in_constant_table);
9094
9095 // The label values in the jump table are computed relative to the
9096 // instruction addressing the constant area.
9097 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
9098
9099 // Populate the jump table with the correct values for the jump table.
9100 int32_t num_entries = switch_instr_->GetNumEntries();
9101 HBasicBlock* block = switch_instr_->GetBlock();
9102 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
9103 // The value that we want is the target offset - the position of the table.
9104 for (int32_t i = 0; i < num_entries; i++) {
9105 HBasicBlock* b = successors[i];
9106 Label* l = codegen_->GetLabelOf(b);
9107 DCHECK(l->IsBound());
9108 int32_t offset_to_block = l->Position() - relative_offset;
9109 assembler->AppendInt32(offset_to_block);
9110 }
9111 }
9112
9113 private:
9114 const HX86PackedSwitch* switch_instr_;
9115 };
9116
Finalize()9117 void CodeGeneratorX86::Finalize() {
9118 // Generate the constant area if needed.
9119 X86Assembler* assembler = GetAssembler();
9120
9121 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
9122 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
9123 // byte values.
9124 assembler->Align(4, 0);
9125 constant_area_start_ = assembler->CodeSize();
9126
9127 // Populate any jump tables.
9128 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
9129 jump_table->CreateJumpTable();
9130 }
9131
9132 // And now add the constant area to the generated code.
9133 assembler->AddConstantArea();
9134 }
9135
9136 // And finish up.
9137 CodeGenerator::Finalize();
9138 }
9139
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)9140 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
9141 HX86ComputeBaseMethodAddress* method_base,
9142 Register reg) {
9143 AssemblerFixup* fixup =
9144 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
9145 return Address(reg, kPlaceholder32BitOffset, fixup);
9146 }
9147
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)9148 Address CodeGeneratorX86::LiteralFloatAddress(float v,
9149 HX86ComputeBaseMethodAddress* method_base,
9150 Register reg) {
9151 AssemblerFixup* fixup =
9152 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
9153 return Address(reg, kPlaceholder32BitOffset, fixup);
9154 }
9155
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9156 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
9157 HX86ComputeBaseMethodAddress* method_base,
9158 Register reg) {
9159 AssemblerFixup* fixup =
9160 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
9161 return Address(reg, kPlaceholder32BitOffset, fixup);
9162 }
9163
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9164 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
9165 HX86ComputeBaseMethodAddress* method_base,
9166 Register reg) {
9167 AssemblerFixup* fixup =
9168 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
9169 return Address(reg, kPlaceholder32BitOffset, fixup);
9170 }
9171
Load32BitValue(Register dest,int32_t value)9172 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
9173 if (value == 0) {
9174 __ xorl(dest, dest);
9175 } else {
9176 __ movl(dest, Immediate(value));
9177 }
9178 }
9179
Compare32BitValue(Register dest,int32_t value)9180 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
9181 if (value == 0) {
9182 __ testl(dest, dest);
9183 } else {
9184 __ cmpl(dest, Immediate(value));
9185 }
9186 }
9187
GenerateIntCompare(Location lhs,Location rhs)9188 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
9189 Register lhs_reg = lhs.AsRegister<Register>();
9190 GenerateIntCompare(lhs_reg, rhs);
9191 }
9192
GenerateIntCompare(Register lhs,Location rhs)9193 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
9194 if (rhs.IsConstant()) {
9195 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
9196 Compare32BitValue(lhs, value);
9197 } else if (rhs.IsStackSlot()) {
9198 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
9199 } else {
9200 __ cmpl(lhs, rhs.AsRegister<Register>());
9201 }
9202 }
9203
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)9204 Address CodeGeneratorX86::ArrayAddress(Register obj,
9205 Location index,
9206 ScaleFactor scale,
9207 uint32_t data_offset) {
9208 return index.IsConstant()
9209 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
9210 : Address(obj, index.AsRegister<Register>(), scale, data_offset);
9211 }
9212
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)9213 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
9214 Register reg,
9215 Register value) {
9216 // Create a fixup to be used to create and address the jump table.
9217 JumpTableRIPFixup* table_fixup =
9218 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
9219
9220 // We have to populate the jump tables.
9221 fixups_to_jump_tables_.push_back(table_fixup);
9222
9223 // We want a scaled address, as we are extracting the correct offset from the table.
9224 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
9225 }
9226
9227 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)9228 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
9229 if (!target.IsValid()) {
9230 DCHECK_EQ(type, DataType::Type::kVoid);
9231 return;
9232 }
9233
9234 DCHECK_NE(type, DataType::Type::kVoid);
9235
9236 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9237 if (target.Equals(return_loc)) {
9238 return;
9239 }
9240
9241 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9242 // with the else branch.
9243 if (type == DataType::Type::kInt64) {
9244 HParallelMove parallel_move(GetGraph()->GetAllocator());
9245 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9246 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9247 GetMoveResolver()->EmitNativeCode(¶llel_move);
9248 } else {
9249 // Let the parallel move resolver take care of all of this.
9250 HParallelMove parallel_move(GetGraph()->GetAllocator());
9251 parallel_move.AddMove(return_loc, target, type, nullptr);
9252 GetMoveResolver()->EmitNativeCode(¶llel_move);
9253 }
9254 }
9255
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9256 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9257 const uint8_t* roots_data,
9258 const PatchInfo<Label>& info,
9259 uint64_t index_in_table) const {
9260 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9261 uintptr_t address =
9262 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9263 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9264 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9265 dchecked_integral_cast<uint32_t>(address);
9266 }
9267
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9268 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9269 for (const PatchInfo<Label>& info : jit_string_patches_) {
9270 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9271 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9272 PatchJitRootUse(code, roots_data, info, index_in_table);
9273 }
9274
9275 for (const PatchInfo<Label>& info : jit_class_patches_) {
9276 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9277 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9278 PatchJitRootUse(code, roots_data, info, index_in_table);
9279 }
9280 }
9281
VisitIntermediateAddress(HIntermediateAddress * instruction)9282 void LocationsBuilderX86::VisitIntermediateAddress(
9283 [[maybe_unused]] HIntermediateAddress* instruction) {
9284 LOG(FATAL) << "Unreachable";
9285 }
9286
VisitIntermediateAddress(HIntermediateAddress * instruction)9287 void InstructionCodeGeneratorX86::VisitIntermediateAddress(
9288 [[maybe_unused]] HIntermediateAddress* instruction) {
9289 LOG(FATAL) << "Unreachable";
9290 }
9291
CpuHasAvxFeatureFlag()9292 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9293 return codegen_->GetInstructionSetFeatures().HasAVX();
9294 }
CpuHasAvx2FeatureFlag()9295 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9296 return codegen_->GetInstructionSetFeatures().HasAVX2();
9297 }
CpuHasAvxFeatureFlag()9298 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9299 return codegen_->GetInstructionSetFeatures().HasAVX();
9300 }
CpuHasAvx2FeatureFlag()9301 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9302 return codegen_->GetInstructionSetFeatures().HasAVX2();
9303 }
9304
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9305 void LocationsBuilderX86::VisitBitwiseNegatedRight(
9306 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9307 LOG(FATAL) << "Unimplemented";
9308 }
9309
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9310 void InstructionCodeGeneratorX86::VisitBitwiseNegatedRight(
9311 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9312 LOG(FATAL) << "Unimplemented";
9313 }
9314
9315 #undef __
9316
9317 } // namespace x86
9318 } // namespace art
9319