1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "interpreter/mterp/nterp.h"
30 #include "intrinsics.h"
31 #include "intrinsics_x86.h"
32 #include "jit/profiling_info.h"
33 #include "linker/linker_patch.h"
34 #include "lock_word.h"
35 #include "mirror/array-inl.h"
36 #include "mirror/class-inl.h"
37 #include "mirror/var_handle.h"
38 #include "scoped_thread_state_change-inl.h"
39 #include "thread.h"
40 #include "utils/assembler.h"
41 #include "utils/stack_checks.h"
42 #include "utils/x86/assembler_x86.h"
43 #include "utils/x86/managed_register_x86.h"
44 
45 namespace art {
46 
47 template<class MirrorType>
48 class GcRoot;
49 
50 namespace x86 {
51 
52 static constexpr int kCurrentMethodStackOffset = 0;
53 static constexpr Register kMethodRegisterArgument = EAX;
54 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
55 
56 static constexpr int kC2ConditionMask = 0x400;
57 
58 static constexpr int kFakeReturnRegister = Register(8);
59 
60 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
61 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
62 
OneRegInReferenceOutSaveEverythingCallerSaves()63 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
64   InvokeRuntimeCallingConvention calling_convention;
65   RegisterSet caller_saves = RegisterSet::Empty();
66   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
67   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
68   // that the the kPrimNot result register is the same as the first argument register.
69   return caller_saves;
70 }
71 
72 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
73 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
74 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
75 
76 class NullCheckSlowPathX86 : public SlowPathCode {
77  public:
NullCheckSlowPathX86(HNullCheck * instruction)78   explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
79 
EmitNativeCode(CodeGenerator * codegen)80   void EmitNativeCode(CodeGenerator* codegen) override {
81     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
82     __ Bind(GetEntryLabel());
83     if (instruction_->CanThrowIntoCatchBlock()) {
84       // Live registers will be restored in the catch block if caught.
85       SaveLiveRegisters(codegen, instruction_->GetLocations());
86     }
87     x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
88                                instruction_,
89                                instruction_->GetDexPc(),
90                                this);
91     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
92   }
93 
IsFatal() const94   bool IsFatal() const override { return true; }
95 
GetDescription() const96   const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
97 
98  private:
99   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
100 };
101 
102 class DivZeroCheckSlowPathX86 : public SlowPathCode {
103  public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)104   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
105 
EmitNativeCode(CodeGenerator * codegen)106   void EmitNativeCode(CodeGenerator* codegen) override {
107     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
108     __ Bind(GetEntryLabel());
109     x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
110     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
111   }
112 
IsFatal() const113   bool IsFatal() const override { return true; }
114 
GetDescription() const115   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
116 
117  private:
118   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
119 };
120 
121 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
122  public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)123   DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
124       : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
125 
EmitNativeCode(CodeGenerator * codegen)126   void EmitNativeCode(CodeGenerator* codegen) override {
127     __ Bind(GetEntryLabel());
128     if (is_div_) {
129       __ negl(reg_);
130     } else {
131       __ movl(reg_, Immediate(0));
132     }
133     __ jmp(GetExitLabel());
134   }
135 
GetDescription() const136   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
137 
138  private:
139   Register reg_;
140   bool is_div_;
141   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
142 };
143 
144 class BoundsCheckSlowPathX86 : public SlowPathCode {
145  public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)146   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
147 
EmitNativeCode(CodeGenerator * codegen)148   void EmitNativeCode(CodeGenerator* codegen) override {
149     LocationSummary* locations = instruction_->GetLocations();
150     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
151     __ Bind(GetEntryLabel());
152     // We're moving two locations to locations that could overlap, so we need a parallel
153     // move resolver.
154     if (instruction_->CanThrowIntoCatchBlock()) {
155       // Live registers will be restored in the catch block if caught.
156       SaveLiveRegisters(codegen, instruction_->GetLocations());
157     }
158 
159     // Are we using an array length from memory?
160     HInstruction* array_length = instruction_->InputAt(1);
161     Location length_loc = locations->InAt(1);
162     InvokeRuntimeCallingConvention calling_convention;
163     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
164       // Load the array length into our temporary.
165       HArrayLength* length = array_length->AsArrayLength();
166       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
167       Location array_loc = array_length->GetLocations()->InAt(0);
168       Address array_len(array_loc.AsRegister<Register>(), len_offset);
169       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
170       // Check for conflicts with index.
171       if (length_loc.Equals(locations->InAt(0))) {
172         // We know we aren't using parameter 2.
173         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
174       }
175       __ movl(length_loc.AsRegister<Register>(), array_len);
176       if (mirror::kUseStringCompression && length->IsStringLength()) {
177         __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
178       }
179     }
180     x86_codegen->EmitParallelMoves(
181         locations->InAt(0),
182         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
183         DataType::Type::kInt32,
184         length_loc,
185         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
186         DataType::Type::kInt32);
187     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
188         ? kQuickThrowStringBounds
189         : kQuickThrowArrayBounds;
190     x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
191     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
192     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
193   }
194 
IsFatal() const195   bool IsFatal() const override { return true; }
196 
GetDescription() const197   const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
198 
199  private:
200   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
201 };
202 
203 class SuspendCheckSlowPathX86 : public SlowPathCode {
204  public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)205   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
206       : SlowPathCode(instruction), successor_(successor) {}
207 
EmitNativeCode(CodeGenerator * codegen)208   void EmitNativeCode(CodeGenerator* codegen) override {
209     LocationSummary* locations = instruction_->GetLocations();
210     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
211     __ Bind(GetEntryLabel());
212     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
213     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
214     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
215     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
216     if (successor_ == nullptr) {
217       __ jmp(GetReturnLabel());
218     } else {
219       __ jmp(x86_codegen->GetLabelOf(successor_));
220     }
221   }
222 
GetReturnLabel()223   Label* GetReturnLabel() {
224     DCHECK(successor_ == nullptr);
225     return &return_label_;
226   }
227 
GetSuccessor() const228   HBasicBlock* GetSuccessor() const {
229     return successor_;
230   }
231 
GetDescription() const232   const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
233 
234  private:
235   HBasicBlock* const successor_;
236   Label return_label_;
237 
238   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
239 };
240 
241 class LoadStringSlowPathX86 : public SlowPathCode {
242  public:
LoadStringSlowPathX86(HLoadString * instruction)243   explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
244 
EmitNativeCode(CodeGenerator * codegen)245   void EmitNativeCode(CodeGenerator* codegen) override {
246     LocationSummary* locations = instruction_->GetLocations();
247     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
248 
249     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
250     __ Bind(GetEntryLabel());
251     SaveLiveRegisters(codegen, locations);
252 
253     InvokeRuntimeCallingConvention calling_convention;
254     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
255     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
256     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
257     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
258     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
259     RestoreLiveRegisters(codegen, locations);
260 
261     __ jmp(GetExitLabel());
262   }
263 
GetDescription() const264   const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
265 
266  private:
267   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
268 };
269 
270 class LoadClassSlowPathX86 : public SlowPathCode {
271  public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)272   LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
273       : SlowPathCode(at), cls_(cls) {
274     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276   }
277 
EmitNativeCode(CodeGenerator * codegen)278   void EmitNativeCode(CodeGenerator* codegen) override {
279     LocationSummary* locations = instruction_->GetLocations();
280     Location out = locations->Out();
281     const uint32_t dex_pc = instruction_->GetDexPc();
282     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284 
285     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
286     __ Bind(GetEntryLabel());
287     SaveLiveRegisters(codegen, locations);
288 
289     InvokeRuntimeCallingConvention calling_convention;
290     if (must_resolve_type) {
291       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
292       dex::TypeIndex type_index = cls_->GetTypeIndex();
293       __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
294       if (cls_->NeedsAccessCheck()) {
295         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
296         x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
297       } else {
298         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
299         x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
300       }
301       // If we also must_do_clinit, the resolved type is now in the correct register.
302     } else {
303       DCHECK(must_do_clinit);
304       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
305       x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
306     }
307     if (must_do_clinit) {
308       x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
309       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
310     }
311 
312     // Move the class to the desired location.
313     if (out.IsValid()) {
314       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
315       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
316     }
317     RestoreLiveRegisters(codegen, locations);
318     __ jmp(GetExitLabel());
319   }
320 
GetDescription() const321   const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
322 
323  private:
324   // The class this slow path will load.
325   HLoadClass* const cls_;
326 
327   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
328 };
329 
330 class TypeCheckSlowPathX86 : public SlowPathCode {
331  public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)332   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
333       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
334 
EmitNativeCode(CodeGenerator * codegen)335   void EmitNativeCode(CodeGenerator* codegen) override {
336     LocationSummary* locations = instruction_->GetLocations();
337     DCHECK(instruction_->IsCheckCast()
338            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
339 
340     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
341     __ Bind(GetEntryLabel());
342 
343     if (kPoisonHeapReferences &&
344         instruction_->IsCheckCast() &&
345         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
346       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
347       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
348     }
349 
350     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
351       SaveLiveRegisters(codegen, locations);
352     }
353 
354     // We're moving two locations to locations that could overlap, so we need a parallel
355     // move resolver.
356     InvokeRuntimeCallingConvention calling_convention;
357     x86_codegen->EmitParallelMoves(locations->InAt(0),
358                                    Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
359                                    DataType::Type::kReference,
360                                    locations->InAt(1),
361                                    Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
362                                    DataType::Type::kReference);
363     if (instruction_->IsInstanceOf()) {
364       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
365                                  instruction_,
366                                  instruction_->GetDexPc(),
367                                  this);
368       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
369     } else {
370       DCHECK(instruction_->IsCheckCast());
371       x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
372                                  instruction_,
373                                  instruction_->GetDexPc(),
374                                  this);
375       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
376     }
377 
378     if (!is_fatal_) {
379       if (instruction_->IsInstanceOf()) {
380         x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
381       }
382       RestoreLiveRegisters(codegen, locations);
383 
384       __ jmp(GetExitLabel());
385     }
386   }
387 
GetDescription() const388   const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const389   bool IsFatal() const override { return is_fatal_; }
390 
391  private:
392   const bool is_fatal_;
393 
394   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
395 };
396 
397 class DeoptimizationSlowPathX86 : public SlowPathCode {
398  public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)399   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
400     : SlowPathCode(instruction) {}
401 
EmitNativeCode(CodeGenerator * codegen)402   void EmitNativeCode(CodeGenerator* codegen) override {
403     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
404     __ Bind(GetEntryLabel());
405     LocationSummary* locations = instruction_->GetLocations();
406     SaveLiveRegisters(codegen, locations);
407     InvokeRuntimeCallingConvention calling_convention;
408     x86_codegen->Load32BitValue(
409         calling_convention.GetRegisterAt(0),
410         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
411     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
412     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
413   }
414 
GetDescription() const415   const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
416 
417  private:
418   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
419 };
420 
421 class ArraySetSlowPathX86 : public SlowPathCode {
422  public:
ArraySetSlowPathX86(HInstruction * instruction)423   explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
424 
EmitNativeCode(CodeGenerator * codegen)425   void EmitNativeCode(CodeGenerator* codegen) override {
426     LocationSummary* locations = instruction_->GetLocations();
427     __ Bind(GetEntryLabel());
428     SaveLiveRegisters(codegen, locations);
429 
430     InvokeRuntimeCallingConvention calling_convention;
431     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
432     parallel_move.AddMove(
433         locations->InAt(0),
434         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
435         DataType::Type::kReference,
436         nullptr);
437     parallel_move.AddMove(
438         locations->InAt(1),
439         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
440         DataType::Type::kInt32,
441         nullptr);
442     parallel_move.AddMove(
443         locations->InAt(2),
444         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
445         DataType::Type::kReference,
446         nullptr);
447     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
448 
449     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
450     x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
451     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
452     RestoreLiveRegisters(codegen, locations);
453     __ jmp(GetExitLabel());
454   }
455 
GetDescription() const456   const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
457 
458  private:
459   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
460 };
461 
462 // Slow path marking an object reference `ref` during a read
463 // barrier. The field `obj.field` in the object `obj` holding this
464 // reference does not get updated by this slow path after marking (see
465 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
466 //
467 // This means that after the execution of this slow path, `ref` will
468 // always be up-to-date, but `obj.field` may not; i.e., after the
469 // flip, `ref` will be a to-space reference, but `obj.field` will
470 // probably still be a from-space reference (unless it gets updated by
471 // another thread, or if another thread installed another object
472 // reference (different from `ref`) in `obj.field`).
473 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
474  public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)475   ReadBarrierMarkSlowPathX86(HInstruction* instruction,
476                              Location ref,
477                              bool unpoison_ref_before_marking)
478       : SlowPathCode(instruction),
479         ref_(ref),
480         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
481     DCHECK(kEmitCompilerReadBarrier);
482   }
483 
GetDescription() const484   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
485 
EmitNativeCode(CodeGenerator * codegen)486   void EmitNativeCode(CodeGenerator* codegen) override {
487     LocationSummary* locations = instruction_->GetLocations();
488     Register ref_reg = ref_.AsRegister<Register>();
489     DCHECK(locations->CanCall());
490     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
491     DCHECK(instruction_->IsInstanceFieldGet() ||
492            instruction_->IsPredicatedInstanceFieldGet() ||
493            instruction_->IsStaticFieldGet() ||
494            instruction_->IsArrayGet() ||
495            instruction_->IsArraySet() ||
496            instruction_->IsLoadClass() ||
497            instruction_->IsLoadString() ||
498            instruction_->IsInstanceOf() ||
499            instruction_->IsCheckCast() ||
500            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
501         << "Unexpected instruction in read barrier marking slow path: "
502         << instruction_->DebugName();
503 
504     __ Bind(GetEntryLabel());
505     if (unpoison_ref_before_marking_) {
506       // Object* ref = ref_addr->AsMirrorPtr()
507       __ MaybeUnpoisonHeapReference(ref_reg);
508     }
509     // No need to save live registers; it's taken care of by the
510     // entrypoint. Also, there is no need to update the stack mask,
511     // as this runtime call will not trigger a garbage collection.
512     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
513     DCHECK_NE(ref_reg, ESP);
514     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
515     // "Compact" slow path, saving two moves.
516     //
517     // Instead of using the standard runtime calling convention (input
518     // and output in EAX):
519     //
520     //   EAX <- ref
521     //   EAX <- ReadBarrierMark(EAX)
522     //   ref <- EAX
523     //
524     // we just use rX (the register containing `ref`) as input and output
525     // of a dedicated entrypoint:
526     //
527     //   rX <- ReadBarrierMarkRegX(rX)
528     //
529     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
530     // This runtime call does not require a stack map.
531     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
532     __ jmp(GetExitLabel());
533   }
534 
535  private:
536   // The location (register) of the marked object reference.
537   const Location ref_;
538   // Should the reference in `ref_` be unpoisoned prior to marking it?
539   const bool unpoison_ref_before_marking_;
540 
541   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
542 };
543 
544 // Slow path marking an object reference `ref` during a read barrier,
545 // and if needed, atomically updating the field `obj.field` in the
546 // object `obj` holding this reference after marking (contrary to
547 // ReadBarrierMarkSlowPathX86 above, which never tries to update
548 // `obj.field`).
549 //
550 // This means that after the execution of this slow path, both `ref`
551 // and `obj.field` will be up-to-date; i.e., after the flip, both will
552 // hold the same to-space reference (unless another thread installed
553 // another object reference (different from `ref`) in `obj.field`).
554 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
555  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)556   ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
557                                            Location ref,
558                                            Register obj,
559                                            const Address& field_addr,
560                                            bool unpoison_ref_before_marking,
561                                            Register temp)
562       : SlowPathCode(instruction),
563         ref_(ref),
564         obj_(obj),
565         field_addr_(field_addr),
566         unpoison_ref_before_marking_(unpoison_ref_before_marking),
567         temp_(temp) {
568     DCHECK(kEmitCompilerReadBarrier);
569   }
570 
GetDescription() const571   const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
572 
EmitNativeCode(CodeGenerator * codegen)573   void EmitNativeCode(CodeGenerator* codegen) override {
574     LocationSummary* locations = instruction_->GetLocations();
575     Register ref_reg = ref_.AsRegister<Register>();
576     DCHECK(locations->CanCall());
577     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
578     // This slow path is only used by the UnsafeCASObject intrinsic.
579     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
580         << "Unexpected instruction in read barrier marking and field updating slow path: "
581         << instruction_->DebugName();
582     DCHECK(instruction_->GetLocations()->Intrinsified());
583     Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
584     static constexpr auto kVarHandleCAS = mirror::VarHandle::AccessModeTemplate::kCompareAndSet;
585     static constexpr auto kVarHandleGetAndSet =
586         mirror::VarHandle::AccessModeTemplate::kGetAndUpdate;
587     static constexpr auto kVarHandleCAX =
588         mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
589     DCHECK(intrinsic == Intrinsics::kUnsafeCASObject ||
590            mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleCAS ||
591            mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleGetAndSet ||
592            mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) == kVarHandleCAX);
593 
594     __ Bind(GetEntryLabel());
595     if (unpoison_ref_before_marking_) {
596       // Object* ref = ref_addr->AsMirrorPtr()
597       __ MaybeUnpoisonHeapReference(ref_reg);
598     }
599 
600     // Save the old (unpoisoned) reference.
601     __ movl(temp_, ref_reg);
602 
603     // No need to save live registers; it's taken care of by the
604     // entrypoint. Also, there is no need to update the stack mask,
605     // as this runtime call will not trigger a garbage collection.
606     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
607     DCHECK_NE(ref_reg, ESP);
608     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
609     // "Compact" slow path, saving two moves.
610     //
611     // Instead of using the standard runtime calling convention (input
612     // and output in EAX):
613     //
614     //   EAX <- ref
615     //   EAX <- ReadBarrierMark(EAX)
616     //   ref <- EAX
617     //
618     // we just use rX (the register containing `ref`) as input and output
619     // of a dedicated entrypoint:
620     //
621     //   rX <- ReadBarrierMarkRegX(rX)
622     //
623     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
624     // This runtime call does not require a stack map.
625     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
626 
627     // If the new reference is different from the old reference,
628     // update the field in the holder (`*field_addr`).
629     //
630     // Note that this field could also hold a different object, if
631     // another thread had concurrently changed it. In that case, the
632     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
633     // operation below would abort the CAS, leaving the field as-is.
634     NearLabel done;
635     __ cmpl(temp_, ref_reg);
636     __ j(kEqual, &done);
637 
638     // Update the the holder's field atomically.  This may fail if
639     // mutator updates before us, but it's OK.  This is achieved
640     // using a strong compare-and-set (CAS) operation with relaxed
641     // memory synchronization ordering, where the expected value is
642     // the old reference and the desired value is the new reference.
643     // This operation is implemented with a 32-bit LOCK CMPXLCHG
644     // instruction, which requires the expected value (the old
645     // reference) to be in EAX.  Save EAX beforehand, and move the
646     // expected value (stored in `temp_`) into EAX.
647     __ pushl(EAX);
648     __ movl(EAX, temp_);
649 
650     // Convenience aliases.
651     Register base = obj_;
652     Register expected = EAX;
653     Register value = ref_reg;
654 
655     bool base_equals_value = (base == value);
656     if (kPoisonHeapReferences) {
657       if (base_equals_value) {
658         // If `base` and `value` are the same register location, move
659         // `value` to a temporary register.  This way, poisoning
660         // `value` won't invalidate `base`.
661         value = temp_;
662         __ movl(value, base);
663       }
664 
665       // Check that the register allocator did not assign the location
666       // of `expected` (EAX) to `value` nor to `base`, so that heap
667       // poisoning (when enabled) works as intended below.
668       // - If `value` were equal to `expected`, both references would
669       //   be poisoned twice, meaning they would not be poisoned at
670       //   all, as heap poisoning uses address negation.
671       // - If `base` were equal to `expected`, poisoning `expected`
672       //   would invalidate `base`.
673       DCHECK_NE(value, expected);
674       DCHECK_NE(base, expected);
675 
676       __ PoisonHeapReference(expected);
677       __ PoisonHeapReference(value);
678     }
679 
680     __ LockCmpxchgl(field_addr_, value);
681 
682     // If heap poisoning is enabled, we need to unpoison the values
683     // that were poisoned earlier.
684     if (kPoisonHeapReferences) {
685       if (base_equals_value) {
686         // `value` has been moved to a temporary register, no need
687         // to unpoison it.
688       } else {
689         __ UnpoisonHeapReference(value);
690       }
691       // No need to unpoison `expected` (EAX), as it is be overwritten below.
692     }
693 
694     // Restore EAX.
695     __ popl(EAX);
696 
697     __ Bind(&done);
698     __ jmp(GetExitLabel());
699   }
700 
701  private:
702   // The location (register) of the marked object reference.
703   const Location ref_;
704   // The register containing the object holding the marked object reference field.
705   const Register obj_;
706   // The address of the marked reference field.  The base of this address must be `obj_`.
707   const Address field_addr_;
708 
709   // Should the reference in `ref_` be unpoisoned prior to marking it?
710   const bool unpoison_ref_before_marking_;
711 
712   const Register temp_;
713 
714   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
715 };
716 
717 // Slow path generating a read barrier for a heap reference.
718 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
719  public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)720   ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
721                                          Location out,
722                                          Location ref,
723                                          Location obj,
724                                          uint32_t offset,
725                                          Location index)
726       : SlowPathCode(instruction),
727         out_(out),
728         ref_(ref),
729         obj_(obj),
730         offset_(offset),
731         index_(index) {
732     DCHECK(kEmitCompilerReadBarrier);
733     // If `obj` is equal to `out` or `ref`, it means the initial object
734     // has been overwritten by (or after) the heap object reference load
735     // to be instrumented, e.g.:
736     //
737     //   __ movl(out, Address(out, offset));
738     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
739     //
740     // In that case, we have lost the information about the original
741     // object, and the emitted read barrier cannot work properly.
742     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
743     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
744   }
745 
EmitNativeCode(CodeGenerator * codegen)746   void EmitNativeCode(CodeGenerator* codegen) override {
747     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
748     LocationSummary* locations = instruction_->GetLocations();
749     Register reg_out = out_.AsRegister<Register>();
750     DCHECK(locations->CanCall());
751     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
752     DCHECK(instruction_->IsInstanceFieldGet() ||
753            instruction_->IsPredicatedInstanceFieldGet() ||
754            instruction_->IsStaticFieldGet() ||
755            instruction_->IsArrayGet() ||
756            instruction_->IsInstanceOf() ||
757            instruction_->IsCheckCast() ||
758            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
759         << "Unexpected instruction in read barrier for heap reference slow path: "
760         << instruction_->DebugName();
761 
762     __ Bind(GetEntryLabel());
763     SaveLiveRegisters(codegen, locations);
764 
765     // We may have to change the index's value, but as `index_` is a
766     // constant member (like other "inputs" of this slow path),
767     // introduce a copy of it, `index`.
768     Location index = index_;
769     if (index_.IsValid()) {
770       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
771       if (instruction_->IsArrayGet()) {
772         // Compute the actual memory offset and store it in `index`.
773         Register index_reg = index_.AsRegister<Register>();
774         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
775         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
776           // We are about to change the value of `index_reg` (see the
777           // calls to art::x86::X86Assembler::shll and
778           // art::x86::X86Assembler::AddImmediate below), but it has
779           // not been saved by the previous call to
780           // art::SlowPathCode::SaveLiveRegisters, as it is a
781           // callee-save register --
782           // art::SlowPathCode::SaveLiveRegisters does not consider
783           // callee-save registers, as it has been designed with the
784           // assumption that callee-save registers are supposed to be
785           // handled by the called function.  So, as a callee-save
786           // register, `index_reg` _would_ eventually be saved onto
787           // the stack, but it would be too late: we would have
788           // changed its value earlier.  Therefore, we manually save
789           // it here into another freely available register,
790           // `free_reg`, chosen of course among the caller-save
791           // registers (as a callee-save `free_reg` register would
792           // exhibit the same problem).
793           //
794           // Note we could have requested a temporary register from
795           // the register allocator instead; but we prefer not to, as
796           // this is a slow path, and we know we can find a
797           // caller-save register that is available.
798           Register free_reg = FindAvailableCallerSaveRegister(codegen);
799           __ movl(free_reg, index_reg);
800           index_reg = free_reg;
801           index = Location::RegisterLocation(index_reg);
802         } else {
803           // The initial register stored in `index_` has already been
804           // saved in the call to art::SlowPathCode::SaveLiveRegisters
805           // (as it is not a callee-save register), so we can freely
806           // use it.
807         }
808         // Shifting the index value contained in `index_reg` by the scale
809         // factor (2) cannot overflow in practice, as the runtime is
810         // unable to allocate object arrays with a size larger than
811         // 2^26 - 1 (that is, 2^28 - 4 bytes).
812         __ shll(index_reg, Immediate(TIMES_4));
813         static_assert(
814             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
815             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
816         __ AddImmediate(index_reg, Immediate(offset_));
817       } else {
818         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
819         // intrinsics, `index_` is not shifted by a scale factor of 2
820         // (as in the case of ArrayGet), as it is actually an offset
821         // to an object field within an object.
822         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
823         DCHECK(instruction_->GetLocations()->Intrinsified());
824         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
825                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
826             << instruction_->AsInvoke()->GetIntrinsic();
827         DCHECK_EQ(offset_, 0U);
828         DCHECK(index_.IsRegisterPair());
829         // UnsafeGet's offset location is a register pair, the low
830         // part contains the correct offset.
831         index = index_.ToLow();
832       }
833     }
834 
835     // We're moving two or three locations to locations that could
836     // overlap, so we need a parallel move resolver.
837     InvokeRuntimeCallingConvention calling_convention;
838     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839     parallel_move.AddMove(ref_,
840                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
841                           DataType::Type::kReference,
842                           nullptr);
843     parallel_move.AddMove(obj_,
844                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
845                           DataType::Type::kReference,
846                           nullptr);
847     if (index.IsValid()) {
848       parallel_move.AddMove(index,
849                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
850                             DataType::Type::kInt32,
851                             nullptr);
852       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
853     } else {
854       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
855       __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
856     }
857     x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
858     CheckEntrypointTypes<
859         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
860     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
861 
862     RestoreLiveRegisters(codegen, locations);
863     __ jmp(GetExitLabel());
864   }
865 
GetDescription() const866   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
867 
868  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)869   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
870     size_t ref = static_cast<int>(ref_.AsRegister<Register>());
871     size_t obj = static_cast<int>(obj_.AsRegister<Register>());
872     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
873       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
874         return static_cast<Register>(i);
875       }
876     }
877     // We shall never fail to find a free caller-save register, as
878     // there are more than two core caller-save registers on x86
879     // (meaning it is possible to find one which is different from
880     // `ref` and `obj`).
881     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
882     LOG(FATAL) << "Could not find a free caller-save register";
883     UNREACHABLE();
884   }
885 
886   const Location out_;
887   const Location ref_;
888   const Location obj_;
889   const uint32_t offset_;
890   // An additional location containing an index to an array.
891   // Only used for HArrayGet and the UnsafeGetObject &
892   // UnsafeGetObjectVolatile intrinsics.
893   const Location index_;
894 
895   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
896 };
897 
898 // Slow path generating a read barrier for a GC root.
899 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
900  public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)901   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
902       : SlowPathCode(instruction), out_(out), root_(root) {
903     DCHECK(kEmitCompilerReadBarrier);
904   }
905 
EmitNativeCode(CodeGenerator * codegen)906   void EmitNativeCode(CodeGenerator* codegen) override {
907     LocationSummary* locations = instruction_->GetLocations();
908     Register reg_out = out_.AsRegister<Register>();
909     DCHECK(locations->CanCall());
910     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
911     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
912         << "Unexpected instruction in read barrier for GC root slow path: "
913         << instruction_->DebugName();
914 
915     __ Bind(GetEntryLabel());
916     SaveLiveRegisters(codegen, locations);
917 
918     InvokeRuntimeCallingConvention calling_convention;
919     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
920     x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
921     x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
922                                instruction_,
923                                instruction_->GetDexPc(),
924                                this);
925     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
926     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
927 
928     RestoreLiveRegisters(codegen, locations);
929     __ jmp(GetExitLabel());
930   }
931 
GetDescription() const932   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
933 
934  private:
935   const Location out_;
936   const Location root_;
937 
938   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
939 };
940 
941 #undef __
942 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
943 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
944 
X86Condition(IfCondition cond)945 inline Condition X86Condition(IfCondition cond) {
946   switch (cond) {
947     case kCondEQ: return kEqual;
948     case kCondNE: return kNotEqual;
949     case kCondLT: return kLess;
950     case kCondLE: return kLessEqual;
951     case kCondGT: return kGreater;
952     case kCondGE: return kGreaterEqual;
953     case kCondB:  return kBelow;
954     case kCondBE: return kBelowEqual;
955     case kCondA:  return kAbove;
956     case kCondAE: return kAboveEqual;
957   }
958   LOG(FATAL) << "Unreachable";
959   UNREACHABLE();
960 }
961 
962 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)963 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
964   switch (cond) {
965     case kCondEQ: return kEqual;
966     case kCondNE: return kNotEqual;
967     // Signed to unsigned, and FP to x86 name.
968     case kCondLT: return kBelow;
969     case kCondLE: return kBelowEqual;
970     case kCondGT: return kAbove;
971     case kCondGE: return kAboveEqual;
972     // Unsigned remain unchanged.
973     case kCondB:  return kBelow;
974     case kCondBE: return kBelowEqual;
975     case kCondA:  return kAbove;
976     case kCondAE: return kAboveEqual;
977   }
978   LOG(FATAL) << "Unreachable";
979   UNREACHABLE();
980 }
981 
DumpCoreRegister(std::ostream & stream,int reg) const982 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
983   stream << Register(reg);
984 }
985 
DumpFloatingPointRegister(std::ostream & stream,int reg) const986 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
987   stream << XmmRegister(reg);
988 }
989 
GetInstructionSetFeatures() const990 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
991   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
992 }
993 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)994 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
995   __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
996   return kX86WordSize;
997 }
998 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)999 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1000   __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1001   return kX86WordSize;
1002 }
1003 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1004 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1005   if (GetGraph()->HasSIMD()) {
1006     __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1007   } else {
1008     __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1009   }
1010   return GetSlowPathFPWidth();
1011 }
1012 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1013 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1014   if (GetGraph()->HasSIMD()) {
1015     __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1016   } else {
1017     __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1018   }
1019   return GetSlowPathFPWidth();
1020 }
1021 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1022 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1023                                      HInstruction* instruction,
1024                                      uint32_t dex_pc,
1025                                      SlowPathCode* slow_path) {
1026   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1027   GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1028   if (EntrypointRequiresStackMap(entrypoint)) {
1029     RecordPcInfo(instruction, dex_pc, slow_path);
1030   }
1031 }
1032 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1033 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1034                                                            HInstruction* instruction,
1035                                                            SlowPathCode* slow_path) {
1036   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1037   GenerateInvokeRuntime(entry_point_offset);
1038 }
1039 
GenerateInvokeRuntime(int32_t entry_point_offset)1040 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1041   __ fs()->call(Address::Absolute(entry_point_offset));
1042 }
1043 
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1044 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1045                                    const CompilerOptions& compiler_options,
1046                                    OptimizingCompilerStats* stats)
1047     : CodeGenerator(graph,
1048                     kNumberOfCpuRegisters,
1049                     kNumberOfXmmRegisters,
1050                     kNumberOfRegisterPairs,
1051                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1052                                         arraysize(kCoreCalleeSaves))
1053                         | (1 << kFakeReturnRegister),
1054                     0,
1055                     compiler_options,
1056                     stats),
1057       block_labels_(nullptr),
1058       location_builder_(graph, this),
1059       instruction_visitor_(graph, this),
1060       move_resolver_(graph->GetAllocator(), this),
1061       assembler_(graph->GetAllocator()),
1062       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1063       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1064       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1065       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1066       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1067       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1068       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1069       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1070       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1071       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1072       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1073       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1074       constant_area_start_(-1),
1075       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1076       method_address_offset_(std::less<uint32_t>(),
1077                              graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1078   // Use a fake return address register to mimic Quick.
1079   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1080 }
1081 
SetupBlockedRegisters() const1082 void CodeGeneratorX86::SetupBlockedRegisters() const {
1083   // Stack register is always reserved.
1084   blocked_core_registers_[ESP] = true;
1085 }
1086 
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1087 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1088       : InstructionCodeGenerator(graph, codegen),
1089         assembler_(codegen->GetAssembler()),
1090         codegen_(codegen) {}
1091 
DWARFReg(Register reg)1092 static dwarf::Reg DWARFReg(Register reg) {
1093   return dwarf::Reg::X86Core(static_cast<int>(reg));
1094 }
1095 
MaybeIncrementHotness(bool is_frame_entry)1096 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1097   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1098     Register reg = EAX;
1099     if (is_frame_entry) {
1100       reg = kMethodRegisterArgument;
1101     } else {
1102       __ pushl(EAX);
1103       __ cfi().AdjustCFAOffset(4);
1104       __ movl(EAX, Address(ESP, kX86WordSize));
1105     }
1106     NearLabel overflow;
1107     __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1108             Immediate(ArtMethod::MaxCounter()));
1109     __ j(kEqual, &overflow);
1110     __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1111             Immediate(1));
1112     __ Bind(&overflow);
1113     if (!is_frame_entry) {
1114       __ popl(EAX);
1115       __ cfi().AdjustCFAOffset(-4);
1116     }
1117   }
1118 
1119   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1120     ScopedProfilingInfoUse spiu(
1121         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
1122     ProfilingInfo* info = spiu.GetProfilingInfo();
1123     if (info != nullptr) {
1124       uint32_t address = reinterpret_cast32<uint32_t>(info);
1125       NearLabel done;
1126       if (HasEmptyFrame()) {
1127         CHECK(is_frame_entry);
1128         // Alignment
1129         IncreaseFrame(8);
1130         // We need a temporary. The stub also expects the method at bottom of stack.
1131         __ pushl(EAX);
1132         __ cfi().AdjustCFAOffset(4);
1133         __ movl(EAX, Immediate(address));
1134         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1135                 Immediate(1));
1136         __ andw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1137                 Immediate(interpreter::kTieredHotnessMask));
1138         __ j(kNotZero, &done);
1139         GenerateInvokeRuntime(
1140             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1141         __ Bind(&done);
1142         // We don't strictly require to restore EAX, but this makes the generated
1143         // code easier to reason about.
1144         __ popl(EAX);
1145         __ cfi().AdjustCFAOffset(-4);
1146         DecreaseFrame(8);
1147       } else {
1148         if (!RequiresCurrentMethod()) {
1149           CHECK(is_frame_entry);
1150           __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1151         }
1152         // We need a temporary.
1153         __ pushl(EAX);
1154         __ cfi().AdjustCFAOffset(4);
1155         __ movl(EAX, Immediate(address));
1156         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1157                 Immediate(1));
1158         __ popl(EAX);  // Put stack as expected before exiting or calling stub.
1159         __ cfi().AdjustCFAOffset(-4);
1160         __ j(kCarryClear, &done);
1161         GenerateInvokeRuntime(
1162             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1163         __ Bind(&done);
1164       }
1165     }
1166   }
1167 }
1168 
GenerateFrameEntry()1169 void CodeGeneratorX86::GenerateFrameEntry() {
1170   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
1171   __ Bind(&frame_entry_label_);
1172   bool skip_overflow_check =
1173       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1174   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1175 
1176   if (!skip_overflow_check) {
1177     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1178     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1179     RecordPcInfo(nullptr, 0);
1180   }
1181 
1182   if (!HasEmptyFrame()) {
1183     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1184       Register reg = kCoreCalleeSaves[i];
1185       if (allocated_registers_.ContainsCoreRegister(reg)) {
1186         __ pushl(reg);
1187         __ cfi().AdjustCFAOffset(kX86WordSize);
1188         __ cfi().RelOffset(DWARFReg(reg), 0);
1189       }
1190     }
1191 
1192     int adjust = GetFrameSize() - FrameEntrySpillSize();
1193     IncreaseFrame(adjust);
1194     // Save the current method if we need it. Note that we do not
1195     // do this in HCurrentMethod, as the instruction might have been removed
1196     // in the SSA graph.
1197     if (RequiresCurrentMethod()) {
1198       __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1199     }
1200 
1201     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1202       // Initialize should_deoptimize flag to 0.
1203       __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1204     }
1205   }
1206 
1207   MaybeIncrementHotness(/* is_frame_entry= */ true);
1208 }
1209 
GenerateFrameExit()1210 void CodeGeneratorX86::GenerateFrameExit() {
1211   __ cfi().RememberState();
1212   if (!HasEmptyFrame()) {
1213     int adjust = GetFrameSize() - FrameEntrySpillSize();
1214     DecreaseFrame(adjust);
1215 
1216     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1217       Register reg = kCoreCalleeSaves[i];
1218       if (allocated_registers_.ContainsCoreRegister(reg)) {
1219         __ popl(reg);
1220         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1221         __ cfi().Restore(DWARFReg(reg));
1222       }
1223     }
1224   }
1225   __ ret();
1226   __ cfi().RestoreState();
1227   __ cfi().DefCFAOffset(GetFrameSize());
1228 }
1229 
Bind(HBasicBlock * block)1230 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1231   __ Bind(GetLabelOf(block));
1232 }
1233 
GetReturnLocation(DataType::Type type) const1234 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1235   switch (type) {
1236     case DataType::Type::kReference:
1237     case DataType::Type::kBool:
1238     case DataType::Type::kUint8:
1239     case DataType::Type::kInt8:
1240     case DataType::Type::kUint16:
1241     case DataType::Type::kInt16:
1242     case DataType::Type::kUint32:
1243     case DataType::Type::kInt32:
1244       return Location::RegisterLocation(EAX);
1245 
1246     case DataType::Type::kUint64:
1247     case DataType::Type::kInt64:
1248       return Location::RegisterPairLocation(EAX, EDX);
1249 
1250     case DataType::Type::kVoid:
1251       return Location::NoLocation();
1252 
1253     case DataType::Type::kFloat64:
1254     case DataType::Type::kFloat32:
1255       return Location::FpuRegisterLocation(XMM0);
1256   }
1257 
1258   UNREACHABLE();
1259 }
1260 
GetMethodLocation() const1261 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1262   return Location::RegisterLocation(kMethodRegisterArgument);
1263 }
1264 
GetNextLocation(DataType::Type type)1265 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1266   switch (type) {
1267     case DataType::Type::kReference:
1268     case DataType::Type::kBool:
1269     case DataType::Type::kUint8:
1270     case DataType::Type::kInt8:
1271     case DataType::Type::kUint16:
1272     case DataType::Type::kInt16:
1273     case DataType::Type::kInt32: {
1274       uint32_t index = gp_index_++;
1275       stack_index_++;
1276       if (index < calling_convention.GetNumberOfRegisters()) {
1277         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1278       } else {
1279         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1280       }
1281     }
1282 
1283     case DataType::Type::kInt64: {
1284       uint32_t index = gp_index_;
1285       gp_index_ += 2;
1286       stack_index_ += 2;
1287       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1288         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1289             calling_convention.GetRegisterPairAt(index));
1290         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1291       } else {
1292         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1293       }
1294     }
1295 
1296     case DataType::Type::kFloat32: {
1297       uint32_t index = float_index_++;
1298       stack_index_++;
1299       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1300         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1301       } else {
1302         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1303       }
1304     }
1305 
1306     case DataType::Type::kFloat64: {
1307       uint32_t index = float_index_++;
1308       stack_index_ += 2;
1309       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1310         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1311       } else {
1312         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1313       }
1314     }
1315 
1316     case DataType::Type::kUint32:
1317     case DataType::Type::kUint64:
1318     case DataType::Type::kVoid:
1319       LOG(FATAL) << "Unexpected parameter type " << type;
1320       UNREACHABLE();
1321   }
1322   return Location::NoLocation();
1323 }
1324 
GetNextLocation(DataType::Type type)1325 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1326   DCHECK_NE(type, DataType::Type::kReference);
1327 
1328   Location location;
1329   if (DataType::Is64BitType(type)) {
1330     location = Location::DoubleStackSlot(stack_offset_);
1331     stack_offset_ += 2 * kFramePointerSize;
1332   } else {
1333     location = Location::StackSlot(stack_offset_);
1334     stack_offset_ += kFramePointerSize;
1335   }
1336   if (for_register_allocation_) {
1337     location = Location::Any();
1338   }
1339   return location;
1340 }
1341 
GetReturnLocation(DataType::Type type) const1342 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1343   // We perform conversion to the managed ABI return register after the call if needed.
1344   InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1345   return dex_calling_convention.GetReturnLocation(type);
1346 }
1347 
GetMethodLocation() const1348 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1349   // Pass the method in the hidden argument EAX.
1350   return Location::RegisterLocation(EAX);
1351 }
1352 
Move32(Location destination,Location source)1353 void CodeGeneratorX86::Move32(Location destination, Location source) {
1354   if (source.Equals(destination)) {
1355     return;
1356   }
1357   if (destination.IsRegister()) {
1358     if (source.IsRegister()) {
1359       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1360     } else if (source.IsFpuRegister()) {
1361       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1362     } else if (source.IsConstant()) {
1363       int32_t value = GetInt32ValueOf(source.GetConstant());
1364       __ movl(destination.AsRegister<Register>(), Immediate(value));
1365     } else {
1366       DCHECK(source.IsStackSlot());
1367       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1368     }
1369   } else if (destination.IsFpuRegister()) {
1370     if (source.IsRegister()) {
1371       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1372     } else if (source.IsFpuRegister()) {
1373       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1374     } else {
1375       DCHECK(source.IsStackSlot());
1376       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1377     }
1378   } else {
1379     DCHECK(destination.IsStackSlot()) << destination;
1380     if (source.IsRegister()) {
1381       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1382     } else if (source.IsFpuRegister()) {
1383       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1384     } else if (source.IsConstant()) {
1385       HConstant* constant = source.GetConstant();
1386       int32_t value = GetInt32ValueOf(constant);
1387       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1388     } else {
1389       DCHECK(source.IsStackSlot());
1390       __ pushl(Address(ESP, source.GetStackIndex()));
1391       __ popl(Address(ESP, destination.GetStackIndex()));
1392     }
1393   }
1394 }
1395 
Move64(Location destination,Location source)1396 void CodeGeneratorX86::Move64(Location destination, Location source) {
1397   if (source.Equals(destination)) {
1398     return;
1399   }
1400   if (destination.IsRegisterPair()) {
1401     if (source.IsRegisterPair()) {
1402       EmitParallelMoves(
1403           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1404           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1405           DataType::Type::kInt32,
1406           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1407           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1408           DataType::Type::kInt32);
1409     } else if (source.IsFpuRegister()) {
1410       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1411       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1412       __ psrlq(src_reg, Immediate(32));
1413       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1414     } else {
1415       // No conflict possible, so just do the moves.
1416       DCHECK(source.IsDoubleStackSlot());
1417       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1418       __ movl(destination.AsRegisterPairHigh<Register>(),
1419               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1420     }
1421   } else if (destination.IsFpuRegister()) {
1422     if (source.IsFpuRegister()) {
1423       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1424     } else if (source.IsDoubleStackSlot()) {
1425       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1426     } else if (source.IsRegisterPair()) {
1427       size_t elem_size = DataType::Size(DataType::Type::kInt32);
1428       // Push the 2 source registers to the stack.
1429       __ pushl(source.AsRegisterPairHigh<Register>());
1430       __ cfi().AdjustCFAOffset(elem_size);
1431       __ pushl(source.AsRegisterPairLow<Register>());
1432       __ cfi().AdjustCFAOffset(elem_size);
1433       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1434       // And remove the temporary stack space we allocated.
1435       DecreaseFrame(2 * elem_size);
1436     } else {
1437       LOG(FATAL) << "Unimplemented";
1438     }
1439   } else {
1440     DCHECK(destination.IsDoubleStackSlot()) << destination;
1441     if (source.IsRegisterPair()) {
1442       // No conflict possible, so just do the moves.
1443       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1444       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1445               source.AsRegisterPairHigh<Register>());
1446     } else if (source.IsFpuRegister()) {
1447       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1448     } else if (source.IsConstant()) {
1449       HConstant* constant = source.GetConstant();
1450       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1451       int64_t value = GetInt64ValueOf(constant);
1452       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1453       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1454               Immediate(High32Bits(value)));
1455     } else {
1456       DCHECK(source.IsDoubleStackSlot()) << source;
1457       EmitParallelMoves(
1458           Location::StackSlot(source.GetStackIndex()),
1459           Location::StackSlot(destination.GetStackIndex()),
1460           DataType::Type::kInt32,
1461           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1462           Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1463           DataType::Type::kInt32);
1464     }
1465   }
1466 }
1467 
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1468 static Address CreateAddress(Register base,
1469                              Register index = Register::kNoRegister,
1470                              ScaleFactor scale = TIMES_1,
1471                              int32_t disp = 0) {
1472   if (index == Register::kNoRegister) {
1473     return Address(base, disp);
1474   }
1475 
1476   return Address(base, index, scale, disp);
1477 }
1478 
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,XmmRegister temp,bool is_atomic_load)1479 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1480                                                Location dst,
1481                                                Address src,
1482                                                XmmRegister temp,
1483                                                bool is_atomic_load) {
1484   switch (dst_type) {
1485     case DataType::Type::kBool:
1486     case DataType::Type::kUint8:
1487       __ movzxb(dst.AsRegister<Register>(), src);
1488       break;
1489     case DataType::Type::kInt8:
1490       __ movsxb(dst.AsRegister<Register>(), src);
1491       break;
1492     case DataType::Type::kInt16:
1493       __ movsxw(dst.AsRegister<Register>(), src);
1494       break;
1495     case DataType::Type::kUint16:
1496       __ movzxw(dst.AsRegister<Register>(), src);
1497       break;
1498     case DataType::Type::kInt32:
1499       __ movl(dst.AsRegister<Register>(), src);
1500       break;
1501     case DataType::Type::kInt64: {
1502       if (is_atomic_load) {
1503         __ movsd(temp, src);
1504         __ movd(dst.AsRegisterPairLow<Register>(), temp);
1505         __ psrlq(temp, Immediate(32));
1506         __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1507       } else {
1508         DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1509         Address src_high = src.displaceBy(kX86WordSize);
1510         __ movl(dst.AsRegisterPairLow<Register>(), src);
1511         __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1512       }
1513       break;
1514     }
1515     case DataType::Type::kFloat32:
1516       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1517       break;
1518     case DataType::Type::kFloat64:
1519       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1520       break;
1521     case DataType::Type::kReference:
1522       __ movl(dst.AsRegister<Register>(), src);
1523       __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1524       break;
1525     default:
1526       LOG(FATAL) << "Unreachable type " << dst_type;
1527   }
1528 }
1529 
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1530 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1531                                     Location src,
1532                                     Register dst_base,
1533                                     Register dst_index,
1534                                     ScaleFactor dst_scale,
1535                                     int32_t dst_disp) {
1536   DCHECK(dst_base != Register::kNoRegister);
1537   Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1538 
1539   switch (src_type) {
1540     case DataType::Type::kBool:
1541     case DataType::Type::kUint8:
1542     case DataType::Type::kInt8: {
1543       if (src.IsConstant()) {
1544         __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1545       } else {
1546         __ movb(dst, src.AsRegister<ByteRegister>());
1547       }
1548       break;
1549     }
1550     case DataType::Type::kUint16:
1551     case DataType::Type::kInt16: {
1552       if (src.IsConstant()) {
1553         __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1554       } else {
1555         __ movw(dst, src.AsRegister<Register>());
1556       }
1557       break;
1558     }
1559     case DataType::Type::kUint32:
1560     case DataType::Type::kInt32: {
1561       if (src.IsConstant()) {
1562         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1563         __ movl(dst, Immediate(v));
1564       } else {
1565         __ movl(dst, src.AsRegister<Register>());
1566       }
1567       break;
1568     }
1569     case DataType::Type::kUint64:
1570     case DataType::Type::kInt64: {
1571       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1572       if (src.IsConstant()) {
1573         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1574         __ movl(dst, Immediate(Low32Bits(v)));
1575         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1576       } else {
1577         __ movl(dst, src.AsRegisterPairLow<Register>());
1578         __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1579       }
1580       break;
1581     }
1582     case DataType::Type::kFloat32: {
1583       if (src.IsConstant()) {
1584         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1585         __ movl(dst, Immediate(v));
1586       } else {
1587         __ movss(dst, src.AsFpuRegister<XmmRegister>());
1588       }
1589       break;
1590     }
1591     case DataType::Type::kFloat64: {
1592       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1593       if (src.IsConstant()) {
1594         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1595         __ movl(dst, Immediate(Low32Bits(v)));
1596         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1597       } else {
1598         __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1599       }
1600       break;
1601     }
1602     case DataType::Type::kVoid:
1603     case DataType::Type::kReference:
1604       LOG(FATAL) << "Unreachable type " << src_type;
1605   }
1606 }
1607 
MoveConstant(Location location,int32_t value)1608 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1609   DCHECK(location.IsRegister());
1610   __ movl(location.AsRegister<Register>(), Immediate(value));
1611 }
1612 
MoveLocation(Location dst,Location src,DataType::Type dst_type)1613 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1614   HParallelMove move(GetGraph()->GetAllocator());
1615   if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1616     move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1617     move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1618   } else {
1619     move.AddMove(src, dst, dst_type, nullptr);
1620   }
1621   GetMoveResolver()->EmitNativeCode(&move);
1622 }
1623 
AddLocationAsTemp(Location location,LocationSummary * locations)1624 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1625   if (location.IsRegister()) {
1626     locations->AddTemp(location);
1627   } else if (location.IsRegisterPair()) {
1628     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1629     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1630   } else {
1631     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1632   }
1633 }
1634 
HandleGoto(HInstruction * got,HBasicBlock * successor)1635 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1636   if (successor->IsExitBlock()) {
1637     DCHECK(got->GetPrevious()->AlwaysThrows());
1638     return;  // no code needed
1639   }
1640 
1641   HBasicBlock* block = got->GetBlock();
1642   HInstruction* previous = got->GetPrevious();
1643 
1644   HLoopInformation* info = block->GetLoopInformation();
1645   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1646     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1647     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1648     return;
1649   }
1650 
1651   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1652     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1653   }
1654   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1655     __ jmp(codegen_->GetLabelOf(successor));
1656   }
1657 }
1658 
VisitGoto(HGoto * got)1659 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1660   got->SetLocations(nullptr);
1661 }
1662 
VisitGoto(HGoto * got)1663 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1664   HandleGoto(got, got->GetSuccessor());
1665 }
1666 
VisitTryBoundary(HTryBoundary * try_boundary)1667 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1668   try_boundary->SetLocations(nullptr);
1669 }
1670 
VisitTryBoundary(HTryBoundary * try_boundary)1671 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1672   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1673   if (!successor->IsExitBlock()) {
1674     HandleGoto(try_boundary, successor);
1675   }
1676 }
1677 
VisitExit(HExit * exit)1678 void LocationsBuilderX86::VisitExit(HExit* exit) {
1679   exit->SetLocations(nullptr);
1680 }
1681 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1682 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1683 }
1684 
1685 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1686 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1687                                                   LabelType* true_label,
1688                                                   LabelType* false_label) {
1689   if (cond->IsFPConditionTrueIfNaN()) {
1690     __ j(kUnordered, true_label);
1691   } else if (cond->IsFPConditionFalseIfNaN()) {
1692     __ j(kUnordered, false_label);
1693   }
1694   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1695 }
1696 
1697 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1698 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1699                                                                LabelType* true_label,
1700                                                                LabelType* false_label) {
1701   LocationSummary* locations = cond->GetLocations();
1702   Location left = locations->InAt(0);
1703   Location right = locations->InAt(1);
1704   IfCondition if_cond = cond->GetCondition();
1705 
1706   Register left_high = left.AsRegisterPairHigh<Register>();
1707   Register left_low = left.AsRegisterPairLow<Register>();
1708   IfCondition true_high_cond = if_cond;
1709   IfCondition false_high_cond = cond->GetOppositeCondition();
1710   Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
1711 
1712   // Set the conditions for the test, remembering that == needs to be
1713   // decided using the low words.
1714   switch (if_cond) {
1715     case kCondEQ:
1716     case kCondNE:
1717       // Nothing to do.
1718       break;
1719     case kCondLT:
1720       false_high_cond = kCondGT;
1721       break;
1722     case kCondLE:
1723       true_high_cond = kCondLT;
1724       break;
1725     case kCondGT:
1726       false_high_cond = kCondLT;
1727       break;
1728     case kCondGE:
1729       true_high_cond = kCondGT;
1730       break;
1731     case kCondB:
1732       false_high_cond = kCondA;
1733       break;
1734     case kCondBE:
1735       true_high_cond = kCondB;
1736       break;
1737     case kCondA:
1738       false_high_cond = kCondB;
1739       break;
1740     case kCondAE:
1741       true_high_cond = kCondA;
1742       break;
1743   }
1744 
1745   if (right.IsConstant()) {
1746     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1747     int32_t val_high = High32Bits(value);
1748     int32_t val_low = Low32Bits(value);
1749 
1750     codegen_->Compare32BitValue(left_high, val_high);
1751     if (if_cond == kCondNE) {
1752       __ j(X86Condition(true_high_cond), true_label);
1753     } else if (if_cond == kCondEQ) {
1754       __ j(X86Condition(false_high_cond), false_label);
1755     } else {
1756       __ j(X86Condition(true_high_cond), true_label);
1757       __ j(X86Condition(false_high_cond), false_label);
1758     }
1759     // Must be equal high, so compare the lows.
1760     codegen_->Compare32BitValue(left_low, val_low);
1761   } else if (right.IsRegisterPair()) {
1762     Register right_high = right.AsRegisterPairHigh<Register>();
1763     Register right_low = right.AsRegisterPairLow<Register>();
1764 
1765     __ cmpl(left_high, right_high);
1766     if (if_cond == kCondNE) {
1767       __ j(X86Condition(true_high_cond), true_label);
1768     } else if (if_cond == kCondEQ) {
1769       __ j(X86Condition(false_high_cond), false_label);
1770     } else {
1771       __ j(X86Condition(true_high_cond), true_label);
1772       __ j(X86Condition(false_high_cond), false_label);
1773     }
1774     // Must be equal high, so compare the lows.
1775     __ cmpl(left_low, right_low);
1776   } else {
1777     DCHECK(right.IsDoubleStackSlot());
1778     __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1779     if (if_cond == kCondNE) {
1780       __ j(X86Condition(true_high_cond), true_label);
1781     } else if (if_cond == kCondEQ) {
1782       __ j(X86Condition(false_high_cond), false_label);
1783     } else {
1784       __ j(X86Condition(true_high_cond), true_label);
1785       __ j(X86Condition(false_high_cond), false_label);
1786     }
1787     // Must be equal high, so compare the lows.
1788     __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1789   }
1790   // The last comparison might be unsigned.
1791   __ j(final_condition, true_label);
1792 }
1793 
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1794 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1795                                                     Location rhs,
1796                                                     HInstruction* insn,
1797                                                     bool is_double) {
1798   HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1799   if (is_double) {
1800     if (rhs.IsFpuRegister()) {
1801       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1802     } else if (const_area != nullptr) {
1803       DCHECK(const_area->IsEmittedAtUseSite());
1804       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1805                  codegen_->LiteralDoubleAddress(
1806                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1807                      const_area->GetBaseMethodAddress(),
1808                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1809     } else {
1810       DCHECK(rhs.IsDoubleStackSlot());
1811       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1812     }
1813   } else {
1814     if (rhs.IsFpuRegister()) {
1815       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1816     } else if (const_area != nullptr) {
1817       DCHECK(const_area->IsEmittedAtUseSite());
1818       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1819                  codegen_->LiteralFloatAddress(
1820                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
1821                      const_area->GetBaseMethodAddress(),
1822                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1823     } else {
1824       DCHECK(rhs.IsStackSlot());
1825       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1826     }
1827   }
1828 }
1829 
1830 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1831 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1832                                                                LabelType* true_target_in,
1833                                                                LabelType* false_target_in) {
1834   // Generated branching requires both targets to be explicit. If either of the
1835   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1836   LabelType fallthrough_target;
1837   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1838   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1839 
1840   LocationSummary* locations = condition->GetLocations();
1841   Location left = locations->InAt(0);
1842   Location right = locations->InAt(1);
1843 
1844   DataType::Type type = condition->InputAt(0)->GetType();
1845   switch (type) {
1846     case DataType::Type::kInt64:
1847       GenerateLongComparesAndJumps(condition, true_target, false_target);
1848       break;
1849     case DataType::Type::kFloat32:
1850       GenerateFPCompare(left, right, condition, false);
1851       GenerateFPJumps(condition, true_target, false_target);
1852       break;
1853     case DataType::Type::kFloat64:
1854       GenerateFPCompare(left, right, condition, true);
1855       GenerateFPJumps(condition, true_target, false_target);
1856       break;
1857     default:
1858       LOG(FATAL) << "Unexpected compare type " << type;
1859   }
1860 
1861   if (false_target != &fallthrough_target) {
1862     __ jmp(false_target);
1863   }
1864 
1865   if (fallthrough_target.IsLinked()) {
1866     __ Bind(&fallthrough_target);
1867   }
1868 }
1869 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1870 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1871   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1872   // are set only strictly before `branch`. We can't use the eflags on long/FP
1873   // conditions if they are materialized due to the complex branching.
1874   return cond->IsCondition() &&
1875          cond->GetNext() == branch &&
1876          cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1877          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1878 }
1879 
1880 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1881 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1882                                                         size_t condition_input_index,
1883                                                         LabelType* true_target,
1884                                                         LabelType* false_target) {
1885   HInstruction* cond = instruction->InputAt(condition_input_index);
1886 
1887   if (true_target == nullptr && false_target == nullptr) {
1888     // Nothing to do. The code always falls through.
1889     return;
1890   } else if (cond->IsIntConstant()) {
1891     // Constant condition, statically compared against "true" (integer value 1).
1892     if (cond->AsIntConstant()->IsTrue()) {
1893       if (true_target != nullptr) {
1894         __ jmp(true_target);
1895       }
1896     } else {
1897       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1898       if (false_target != nullptr) {
1899         __ jmp(false_target);
1900       }
1901     }
1902     return;
1903   }
1904 
1905   // The following code generates these patterns:
1906   //  (1) true_target == nullptr && false_target != nullptr
1907   //        - opposite condition true => branch to false_target
1908   //  (2) true_target != nullptr && false_target == nullptr
1909   //        - condition true => branch to true_target
1910   //  (3) true_target != nullptr && false_target != nullptr
1911   //        - condition true => branch to true_target
1912   //        - branch to false_target
1913   if (IsBooleanValueOrMaterializedCondition(cond)) {
1914     if (AreEflagsSetFrom(cond, instruction)) {
1915       if (true_target == nullptr) {
1916         __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1917       } else {
1918         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1919       }
1920     } else {
1921       // Materialized condition, compare against 0.
1922       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1923       if (lhs.IsRegister()) {
1924         __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1925       } else {
1926         __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1927       }
1928       if (true_target == nullptr) {
1929         __ j(kEqual, false_target);
1930       } else {
1931         __ j(kNotEqual, true_target);
1932       }
1933     }
1934   } else {
1935     // Condition has not been materialized, use its inputs as the comparison and
1936     // its condition as the branch condition.
1937     HCondition* condition = cond->AsCondition();
1938 
1939     // If this is a long or FP comparison that has been folded into
1940     // the HCondition, generate the comparison directly.
1941     DataType::Type type = condition->InputAt(0)->GetType();
1942     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1943       GenerateCompareTestAndBranch(condition, true_target, false_target);
1944       return;
1945     }
1946 
1947     Location lhs = condition->GetLocations()->InAt(0);
1948     Location rhs = condition->GetLocations()->InAt(1);
1949     // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1950     codegen_->GenerateIntCompare(lhs, rhs);
1951     if (true_target == nullptr) {
1952       __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1953     } else {
1954       __ j(X86Condition(condition->GetCondition()), true_target);
1955     }
1956   }
1957 
1958   // If neither branch falls through (case 3), the conditional branch to `true_target`
1959   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1960   if (true_target != nullptr && false_target != nullptr) {
1961     __ jmp(false_target);
1962   }
1963 }
1964 
VisitIf(HIf * if_instr)1965 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1966   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1967   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1968     locations->SetInAt(0, Location::Any());
1969   }
1970 }
1971 
VisitIf(HIf * if_instr)1972 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1973   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1974   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1975   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1976       nullptr : codegen_->GetLabelOf(true_successor);
1977   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1978       nullptr : codegen_->GetLabelOf(false_successor);
1979   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1980 }
1981 
VisitDeoptimize(HDeoptimize * deoptimize)1982 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1983   LocationSummary* locations = new (GetGraph()->GetAllocator())
1984       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1985   InvokeRuntimeCallingConvention calling_convention;
1986   RegisterSet caller_saves = RegisterSet::Empty();
1987   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1988   locations->SetCustomSlowPathCallerSaves(caller_saves);
1989   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1990     locations->SetInAt(0, Location::Any());
1991   }
1992 }
1993 
VisitDeoptimize(HDeoptimize * deoptimize)1994 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1995   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1996   GenerateTestAndBranch<Label>(deoptimize,
1997                                /* condition_input_index= */ 0,
1998                                slow_path->GetEntryLabel(),
1999                                /* false_target= */ nullptr);
2000 }
2001 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2002 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2003   LocationSummary* locations = new (GetGraph()->GetAllocator())
2004       LocationSummary(flag, LocationSummary::kNoCall);
2005   locations->SetOut(Location::RequiresRegister());
2006 }
2007 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2008 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2009   __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2010           Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2011 }
2012 
SelectCanUseCMOV(HSelect * select)2013 static bool SelectCanUseCMOV(HSelect* select) {
2014   // There are no conditional move instructions for XMMs.
2015   if (DataType::IsFloatingPointType(select->GetType())) {
2016     return false;
2017   }
2018 
2019   // A FP condition doesn't generate the single CC that we need.
2020   // In 32 bit mode, a long condition doesn't generate a single CC either.
2021   HInstruction* condition = select->GetCondition();
2022   if (condition->IsCondition()) {
2023     DataType::Type compare_type = condition->InputAt(0)->GetType();
2024     if (compare_type == DataType::Type::kInt64 ||
2025         DataType::IsFloatingPointType(compare_type)) {
2026       return false;
2027     }
2028   }
2029 
2030   // We can generate a CMOV for this Select.
2031   return true;
2032 }
2033 
VisitSelect(HSelect * select)2034 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2035   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2036   if (DataType::IsFloatingPointType(select->GetType())) {
2037     locations->SetInAt(0, Location::RequiresFpuRegister());
2038     locations->SetInAt(1, Location::Any());
2039   } else {
2040     locations->SetInAt(0, Location::RequiresRegister());
2041     if (SelectCanUseCMOV(select)) {
2042       if (select->InputAt(1)->IsConstant()) {
2043         // Cmov can't handle a constant value.
2044         locations->SetInAt(1, Location::RequiresRegister());
2045       } else {
2046         locations->SetInAt(1, Location::Any());
2047       }
2048     } else {
2049       locations->SetInAt(1, Location::Any());
2050     }
2051   }
2052   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2053     locations->SetInAt(2, Location::RequiresRegister());
2054   }
2055   locations->SetOut(Location::SameAsFirstInput());
2056 }
2057 
VisitSelect(HSelect * select)2058 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2059   LocationSummary* locations = select->GetLocations();
2060   DCHECK(locations->InAt(0).Equals(locations->Out()));
2061   if (SelectCanUseCMOV(select)) {
2062     // If both the condition and the source types are integer, we can generate
2063     // a CMOV to implement Select.
2064 
2065     HInstruction* select_condition = select->GetCondition();
2066     Condition cond = kNotEqual;
2067 
2068     // Figure out how to test the 'condition'.
2069     if (select_condition->IsCondition()) {
2070       HCondition* condition = select_condition->AsCondition();
2071       if (!condition->IsEmittedAtUseSite()) {
2072         // This was a previously materialized condition.
2073         // Can we use the existing condition code?
2074         if (AreEflagsSetFrom(condition, select)) {
2075           // Materialization was the previous instruction. Condition codes are right.
2076           cond = X86Condition(condition->GetCondition());
2077         } else {
2078           // No, we have to recreate the condition code.
2079           Register cond_reg = locations->InAt(2).AsRegister<Register>();
2080           __ testl(cond_reg, cond_reg);
2081         }
2082       } else {
2083         // We can't handle FP or long here.
2084         DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2085         DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2086         LocationSummary* cond_locations = condition->GetLocations();
2087         codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2088         cond = X86Condition(condition->GetCondition());
2089       }
2090     } else {
2091       // Must be a Boolean condition, which needs to be compared to 0.
2092       Register cond_reg = locations->InAt(2).AsRegister<Register>();
2093       __ testl(cond_reg, cond_reg);
2094     }
2095 
2096     // If the condition is true, overwrite the output, which already contains false.
2097     Location false_loc = locations->InAt(0);
2098     Location true_loc = locations->InAt(1);
2099     if (select->GetType() == DataType::Type::kInt64) {
2100       // 64 bit conditional move.
2101       Register false_high = false_loc.AsRegisterPairHigh<Register>();
2102       Register false_low = false_loc.AsRegisterPairLow<Register>();
2103       if (true_loc.IsRegisterPair()) {
2104         __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2105         __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2106       } else {
2107         __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2108         __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2109       }
2110     } else {
2111       // 32 bit conditional move.
2112       Register false_reg = false_loc.AsRegister<Register>();
2113       if (true_loc.IsRegister()) {
2114         __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2115       } else {
2116         __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2117       }
2118     }
2119   } else {
2120     NearLabel false_target;
2121     GenerateTestAndBranch<NearLabel>(
2122         select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2123     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2124     __ Bind(&false_target);
2125   }
2126 }
2127 
VisitNativeDebugInfo(HNativeDebugInfo * info)2128 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2129   new (GetGraph()->GetAllocator()) LocationSummary(info);
2130 }
2131 
VisitNativeDebugInfo(HNativeDebugInfo *)2132 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
2133   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2134 }
2135 
IncreaseFrame(size_t adjustment)2136 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2137   __ subl(ESP, Immediate(adjustment));
2138   __ cfi().AdjustCFAOffset(adjustment);
2139 }
2140 
DecreaseFrame(size_t adjustment)2141 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2142   __ addl(ESP, Immediate(adjustment));
2143   __ cfi().AdjustCFAOffset(-adjustment);
2144 }
2145 
GenerateNop()2146 void CodeGeneratorX86::GenerateNop() {
2147   __ nop();
2148 }
2149 
HandleCondition(HCondition * cond)2150 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2151   LocationSummary* locations =
2152       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2153   // Handle the long/FP comparisons made in instruction simplification.
2154   switch (cond->InputAt(0)->GetType()) {
2155     case DataType::Type::kInt64: {
2156       locations->SetInAt(0, Location::RequiresRegister());
2157       locations->SetInAt(1, Location::Any());
2158       if (!cond->IsEmittedAtUseSite()) {
2159         locations->SetOut(Location::RequiresRegister());
2160       }
2161       break;
2162     }
2163     case DataType::Type::kFloat32:
2164     case DataType::Type::kFloat64: {
2165       locations->SetInAt(0, Location::RequiresFpuRegister());
2166       if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2167         DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2168       } else if (cond->InputAt(1)->IsConstant()) {
2169         locations->SetInAt(1, Location::RequiresFpuRegister());
2170       } else {
2171         locations->SetInAt(1, Location::Any());
2172       }
2173       if (!cond->IsEmittedAtUseSite()) {
2174         locations->SetOut(Location::RequiresRegister());
2175       }
2176       break;
2177     }
2178     default:
2179       locations->SetInAt(0, Location::RequiresRegister());
2180       locations->SetInAt(1, Location::Any());
2181       if (!cond->IsEmittedAtUseSite()) {
2182         // We need a byte register.
2183         locations->SetOut(Location::RegisterLocation(ECX));
2184       }
2185       break;
2186   }
2187 }
2188 
HandleCondition(HCondition * cond)2189 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2190   if (cond->IsEmittedAtUseSite()) {
2191     return;
2192   }
2193 
2194   LocationSummary* locations = cond->GetLocations();
2195   Location lhs = locations->InAt(0);
2196   Location rhs = locations->InAt(1);
2197   Register reg = locations->Out().AsRegister<Register>();
2198   NearLabel true_label, false_label;
2199 
2200   switch (cond->InputAt(0)->GetType()) {
2201     default: {
2202       // Integer case.
2203 
2204       // Clear output register: setb only sets the low byte.
2205       __ xorl(reg, reg);
2206       codegen_->GenerateIntCompare(lhs, rhs);
2207       __ setb(X86Condition(cond->GetCondition()), reg);
2208       return;
2209     }
2210     case DataType::Type::kInt64:
2211       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2212       break;
2213     case DataType::Type::kFloat32:
2214       GenerateFPCompare(lhs, rhs, cond, false);
2215       GenerateFPJumps(cond, &true_label, &false_label);
2216       break;
2217     case DataType::Type::kFloat64:
2218       GenerateFPCompare(lhs, rhs, cond, true);
2219       GenerateFPJumps(cond, &true_label, &false_label);
2220       break;
2221   }
2222 
2223   // Convert the jumps into the result.
2224   NearLabel done_label;
2225 
2226   // False case: result = 0.
2227   __ Bind(&false_label);
2228   __ xorl(reg, reg);
2229   __ jmp(&done_label);
2230 
2231   // True case: result = 1.
2232   __ Bind(&true_label);
2233   __ movl(reg, Immediate(1));
2234   __ Bind(&done_label);
2235 }
2236 
VisitEqual(HEqual * comp)2237 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2238   HandleCondition(comp);
2239 }
2240 
VisitEqual(HEqual * comp)2241 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2242   HandleCondition(comp);
2243 }
2244 
VisitNotEqual(HNotEqual * comp)2245 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2246   HandleCondition(comp);
2247 }
2248 
VisitNotEqual(HNotEqual * comp)2249 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2250   HandleCondition(comp);
2251 }
2252 
VisitLessThan(HLessThan * comp)2253 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2254   HandleCondition(comp);
2255 }
2256 
VisitLessThan(HLessThan * comp)2257 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2258   HandleCondition(comp);
2259 }
2260 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2261 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2262   HandleCondition(comp);
2263 }
2264 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2265 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2266   HandleCondition(comp);
2267 }
2268 
VisitGreaterThan(HGreaterThan * comp)2269 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2270   HandleCondition(comp);
2271 }
2272 
VisitGreaterThan(HGreaterThan * comp)2273 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2274   HandleCondition(comp);
2275 }
2276 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2277 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2278   HandleCondition(comp);
2279 }
2280 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2281 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2282   HandleCondition(comp);
2283 }
2284 
VisitBelow(HBelow * comp)2285 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2286   HandleCondition(comp);
2287 }
2288 
VisitBelow(HBelow * comp)2289 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2290   HandleCondition(comp);
2291 }
2292 
VisitBelowOrEqual(HBelowOrEqual * comp)2293 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2294   HandleCondition(comp);
2295 }
2296 
VisitBelowOrEqual(HBelowOrEqual * comp)2297 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2298   HandleCondition(comp);
2299 }
2300 
VisitAbove(HAbove * comp)2301 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2302   HandleCondition(comp);
2303 }
2304 
VisitAbove(HAbove * comp)2305 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2306   HandleCondition(comp);
2307 }
2308 
VisitAboveOrEqual(HAboveOrEqual * comp)2309 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2310   HandleCondition(comp);
2311 }
2312 
VisitAboveOrEqual(HAboveOrEqual * comp)2313 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2314   HandleCondition(comp);
2315 }
2316 
VisitIntConstant(HIntConstant * constant)2317 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2318   LocationSummary* locations =
2319       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2320   locations->SetOut(Location::ConstantLocation(constant));
2321 }
2322 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2323 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2324   // Will be generated at use site.
2325 }
2326 
VisitNullConstant(HNullConstant * constant)2327 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2328   LocationSummary* locations =
2329       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2330   locations->SetOut(Location::ConstantLocation(constant));
2331 }
2332 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2333 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2334   // Will be generated at use site.
2335 }
2336 
VisitLongConstant(HLongConstant * constant)2337 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2338   LocationSummary* locations =
2339       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2340   locations->SetOut(Location::ConstantLocation(constant));
2341 }
2342 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2343 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2344   // Will be generated at use site.
2345 }
2346 
VisitFloatConstant(HFloatConstant * constant)2347 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2348   LocationSummary* locations =
2349       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2350   locations->SetOut(Location::ConstantLocation(constant));
2351 }
2352 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2353 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2354   // Will be generated at use site.
2355 }
2356 
VisitDoubleConstant(HDoubleConstant * constant)2357 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2358   LocationSummary* locations =
2359       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2360   locations->SetOut(Location::ConstantLocation(constant));
2361 }
2362 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2363 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2364   // Will be generated at use site.
2365 }
2366 
VisitConstructorFence(HConstructorFence * constructor_fence)2367 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2368   constructor_fence->SetLocations(nullptr);
2369 }
2370 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2371 void InstructionCodeGeneratorX86::VisitConstructorFence(
2372     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2373   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2374 }
2375 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2376 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2377   memory_barrier->SetLocations(nullptr);
2378 }
2379 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2380 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2381   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2382 }
2383 
VisitReturnVoid(HReturnVoid * ret)2384 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2385   ret->SetLocations(nullptr);
2386 }
2387 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2388 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2389   codegen_->GenerateFrameExit();
2390 }
2391 
VisitReturn(HReturn * ret)2392 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2393   LocationSummary* locations =
2394       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2395   switch (ret->InputAt(0)->GetType()) {
2396     case DataType::Type::kReference:
2397     case DataType::Type::kBool:
2398     case DataType::Type::kUint8:
2399     case DataType::Type::kInt8:
2400     case DataType::Type::kUint16:
2401     case DataType::Type::kInt16:
2402     case DataType::Type::kInt32:
2403       locations->SetInAt(0, Location::RegisterLocation(EAX));
2404       break;
2405 
2406     case DataType::Type::kInt64:
2407       locations->SetInAt(
2408           0, Location::RegisterPairLocation(EAX, EDX));
2409       break;
2410 
2411     case DataType::Type::kFloat32:
2412     case DataType::Type::kFloat64:
2413       locations->SetInAt(
2414           0, Location::FpuRegisterLocation(XMM0));
2415       break;
2416 
2417     default:
2418       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2419   }
2420 }
2421 
VisitReturn(HReturn * ret)2422 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2423   switch (ret->InputAt(0)->GetType()) {
2424     case DataType::Type::kReference:
2425     case DataType::Type::kBool:
2426     case DataType::Type::kUint8:
2427     case DataType::Type::kInt8:
2428     case DataType::Type::kUint16:
2429     case DataType::Type::kInt16:
2430     case DataType::Type::kInt32:
2431       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2432       break;
2433 
2434     case DataType::Type::kInt64:
2435       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2436       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2437       break;
2438 
2439     case DataType::Type::kFloat32:
2440       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2441       if (GetGraph()->IsCompilingOsr()) {
2442         // To simplify callers of an OSR method, we put the return value in both
2443         // floating point and core registers.
2444         __ movd(EAX, XMM0);
2445       }
2446       break;
2447 
2448     case DataType::Type::kFloat64:
2449       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2450       if (GetGraph()->IsCompilingOsr()) {
2451         // To simplify callers of an OSR method, we put the return value in both
2452         // floating point and core registers.
2453         __ movd(EAX, XMM0);
2454         // Use XMM1 as temporary register to not clobber XMM0.
2455         __ movaps(XMM1, XMM0);
2456         __ psrlq(XMM1, Immediate(32));
2457         __ movd(EDX, XMM1);
2458       }
2459       break;
2460 
2461     default:
2462       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2463   }
2464   codegen_->GenerateFrameExit();
2465 }
2466 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2467 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2468   // The trampoline uses the same calling convention as dex calling conventions,
2469   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2470   // the method_idx.
2471   HandleInvoke(invoke);
2472 }
2473 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2474 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2475   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2476 }
2477 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2478 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2479   // Explicit clinit checks triggered by static invokes must have been pruned by
2480   // art::PrepareForRegisterAllocation.
2481   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2482 
2483   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2484   if (intrinsic.TryDispatch(invoke)) {
2485     if (invoke->GetLocations()->CanCall() &&
2486         invoke->HasPcRelativeMethodLoadKind() &&
2487         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2488       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2489     }
2490     return;
2491   }
2492 
2493   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2494     CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2495         /*for_register_allocation=*/ true);
2496     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2497   } else {
2498     HandleInvoke(invoke);
2499   }
2500 
2501   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2502   if (invoke->HasPcRelativeMethodLoadKind()) {
2503     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2504   }
2505 }
2506 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2507 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2508   if (invoke->GetLocations()->Intrinsified()) {
2509     IntrinsicCodeGeneratorX86 intrinsic(codegen);
2510     intrinsic.Dispatch(invoke);
2511     return true;
2512   }
2513   return false;
2514 }
2515 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2516 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2517   // Explicit clinit checks triggered by static invokes must have been pruned by
2518   // art::PrepareForRegisterAllocation.
2519   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2520 
2521   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2522     return;
2523   }
2524 
2525   LocationSummary* locations = invoke->GetLocations();
2526   codegen_->GenerateStaticOrDirectCall(
2527       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2528 }
2529 
VisitInvokeVirtual(HInvokeVirtual * invoke)2530 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2531   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2532   if (intrinsic.TryDispatch(invoke)) {
2533     return;
2534   }
2535 
2536   HandleInvoke(invoke);
2537 
2538   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2539     // Add one temporary for inline cache update.
2540     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2541   }
2542 }
2543 
HandleInvoke(HInvoke * invoke)2544 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2545   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2546   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2547 }
2548 
VisitInvokeVirtual(HInvokeVirtual * invoke)2549 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2550   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2551     return;
2552   }
2553 
2554   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2555   DCHECK(!codegen_->IsLeafMethod());
2556 }
2557 
VisitInvokeInterface(HInvokeInterface * invoke)2558 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2559   // This call to HandleInvoke allocates a temporary (core) register
2560   // which is also used to transfer the hidden argument from FP to
2561   // core register.
2562   HandleInvoke(invoke);
2563   // Add the hidden argument.
2564   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2565 
2566   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2567     // Add one temporary for inline cache update.
2568     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2569   }
2570 
2571   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2572   if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2573     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2574   }
2575 
2576   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2577     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2578                                     Location::RequiresRegister());
2579   }
2580 }
2581 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2582 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2583   DCHECK_EQ(EAX, klass);
2584   // We know the destination of an intrinsic, so no need to record inline
2585   // caches (also the intrinsic location builder doesn't request an additional
2586   // temporary).
2587   if (!instruction->GetLocations()->Intrinsified() &&
2588       GetGraph()->IsCompilingBaseline() &&
2589       !Runtime::Current()->IsAotCompiler()) {
2590     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2591     ScopedProfilingInfoUse spiu(
2592         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
2593     ProfilingInfo* info = spiu.GetProfilingInfo();
2594     if (info != nullptr) {
2595       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2596       uint32_t address = reinterpret_cast32<uint32_t>(cache);
2597       if (kIsDebugBuild) {
2598         uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2599         CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2600       }
2601       Register temp = EBP;
2602       NearLabel done;
2603       __ movl(temp, Immediate(address));
2604       // Fast path for a monomorphic cache.
2605       __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2606       __ j(kEqual, &done);
2607       GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2608       __ Bind(&done);
2609     }
2610   }
2611 }
2612 
VisitInvokeInterface(HInvokeInterface * invoke)2613 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2614   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2615   LocationSummary* locations = invoke->GetLocations();
2616   Register temp = locations->GetTemp(0).AsRegister<Register>();
2617   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2618   Location receiver = locations->InAt(0);
2619   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2620 
2621   // Set the hidden argument. This is safe to do this here, as XMM7
2622   // won't be modified thereafter, before the `call` instruction.
2623   DCHECK_EQ(XMM7, hidden_reg);
2624   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2625     __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2626   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2627     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2628     __ movd(hidden_reg, temp);
2629   }
2630 
2631   if (receiver.IsStackSlot()) {
2632     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2633     // /* HeapReference<Class> */ temp = temp->klass_
2634     __ movl(temp, Address(temp, class_offset));
2635   } else {
2636     // /* HeapReference<Class> */ temp = receiver->klass_
2637     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2638   }
2639   codegen_->MaybeRecordImplicitNullCheck(invoke);
2640   // Instead of simply (possibly) unpoisoning `temp` here, we should
2641   // emit a read barrier for the previous class reference load.
2642   // However this is not required in practice, as this is an
2643   // intermediate/temporary reference and because the current
2644   // concurrent copying collector keeps the from-space memory
2645   // intact/accessible until the end of the marking phase (the
2646   // concurrent copying collector may not in the future).
2647   __ MaybeUnpoisonHeapReference(temp);
2648 
2649   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2650 
2651   // temp = temp->GetAddressOfIMT()
2652   __ movl(temp,
2653       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2654   // temp = temp->GetImtEntryAt(method_offset);
2655   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2656       invoke->GetImtIndex(), kX86PointerSize));
2657   __ movl(temp, Address(temp, method_offset));
2658   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2659     // We pass the method from the IMT in case of a conflict. This will ensure
2660     // we go into the runtime to resolve the actual method.
2661     __ movd(hidden_reg, temp);
2662   }
2663   // call temp->GetEntryPoint();
2664   __ call(Address(temp,
2665                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2666 
2667   DCHECK(!codegen_->IsLeafMethod());
2668   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2669 }
2670 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2671 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2672   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2673   if (intrinsic.TryDispatch(invoke)) {
2674     return;
2675   }
2676   HandleInvoke(invoke);
2677 }
2678 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2679 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2680   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2681     return;
2682   }
2683   codegen_->GenerateInvokePolymorphicCall(invoke);
2684 }
2685 
VisitInvokeCustom(HInvokeCustom * invoke)2686 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2687   HandleInvoke(invoke);
2688 }
2689 
VisitInvokeCustom(HInvokeCustom * invoke)2690 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2691   codegen_->GenerateInvokeCustomCall(invoke);
2692 }
2693 
VisitNeg(HNeg * neg)2694 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2695   LocationSummary* locations =
2696       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2697   switch (neg->GetResultType()) {
2698     case DataType::Type::kInt32:
2699     case DataType::Type::kInt64:
2700       locations->SetInAt(0, Location::RequiresRegister());
2701       locations->SetOut(Location::SameAsFirstInput());
2702       break;
2703 
2704     case DataType::Type::kFloat32:
2705       locations->SetInAt(0, Location::RequiresFpuRegister());
2706       locations->SetOut(Location::SameAsFirstInput());
2707       locations->AddTemp(Location::RequiresRegister());
2708       locations->AddTemp(Location::RequiresFpuRegister());
2709       break;
2710 
2711     case DataType::Type::kFloat64:
2712       locations->SetInAt(0, Location::RequiresFpuRegister());
2713       locations->SetOut(Location::SameAsFirstInput());
2714       locations->AddTemp(Location::RequiresFpuRegister());
2715       break;
2716 
2717     default:
2718       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2719   }
2720 }
2721 
VisitNeg(HNeg * neg)2722 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2723   LocationSummary* locations = neg->GetLocations();
2724   Location out = locations->Out();
2725   Location in = locations->InAt(0);
2726   switch (neg->GetResultType()) {
2727     case DataType::Type::kInt32:
2728       DCHECK(in.IsRegister());
2729       DCHECK(in.Equals(out));
2730       __ negl(out.AsRegister<Register>());
2731       break;
2732 
2733     case DataType::Type::kInt64:
2734       DCHECK(in.IsRegisterPair());
2735       DCHECK(in.Equals(out));
2736       __ negl(out.AsRegisterPairLow<Register>());
2737       // Negation is similar to subtraction from zero.  The least
2738       // significant byte triggers a borrow when it is different from
2739       // zero; to take it into account, add 1 to the most significant
2740       // byte if the carry flag (CF) is set to 1 after the first NEGL
2741       // operation.
2742       __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2743       __ negl(out.AsRegisterPairHigh<Register>());
2744       break;
2745 
2746     case DataType::Type::kFloat32: {
2747       DCHECK(in.Equals(out));
2748       Register constant = locations->GetTemp(0).AsRegister<Register>();
2749       XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2750       // Implement float negation with an exclusive or with value
2751       // 0x80000000 (mask for bit 31, representing the sign of a
2752       // single-precision floating-point number).
2753       __ movl(constant, Immediate(INT32_C(0x80000000)));
2754       __ movd(mask, constant);
2755       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2756       break;
2757     }
2758 
2759     case DataType::Type::kFloat64: {
2760       DCHECK(in.Equals(out));
2761       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2762       // Implement double negation with an exclusive or with value
2763       // 0x8000000000000000 (mask for bit 63, representing the sign of
2764       // a double-precision floating-point number).
2765       __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2766       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2767       break;
2768     }
2769 
2770     default:
2771       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2772   }
2773 }
2774 
VisitX86FPNeg(HX86FPNeg * neg)2775 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2776   LocationSummary* locations =
2777       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2778   DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2779   locations->SetInAt(0, Location::RequiresFpuRegister());
2780   locations->SetInAt(1, Location::RequiresRegister());
2781   locations->SetOut(Location::SameAsFirstInput());
2782   locations->AddTemp(Location::RequiresFpuRegister());
2783 }
2784 
VisitX86FPNeg(HX86FPNeg * neg)2785 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2786   LocationSummary* locations = neg->GetLocations();
2787   Location out = locations->Out();
2788   DCHECK(locations->InAt(0).Equals(out));
2789 
2790   Register constant_area = locations->InAt(1).AsRegister<Register>();
2791   XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2792   if (neg->GetType() == DataType::Type::kFloat32) {
2793     __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2794                                                  neg->GetBaseMethodAddress(),
2795                                                  constant_area));
2796     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2797   } else {
2798      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2799                                                   neg->GetBaseMethodAddress(),
2800                                                   constant_area));
2801      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2802   }
2803 }
2804 
VisitTypeConversion(HTypeConversion * conversion)2805 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2806   DataType::Type result_type = conversion->GetResultType();
2807   DataType::Type input_type = conversion->GetInputType();
2808   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2809       << input_type << " -> " << result_type;
2810 
2811   // The float-to-long and double-to-long type conversions rely on a
2812   // call to the runtime.
2813   LocationSummary::CallKind call_kind =
2814       ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2815        && result_type == DataType::Type::kInt64)
2816       ? LocationSummary::kCallOnMainOnly
2817       : LocationSummary::kNoCall;
2818   LocationSummary* locations =
2819       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2820 
2821   switch (result_type) {
2822     case DataType::Type::kUint8:
2823     case DataType::Type::kInt8:
2824       switch (input_type) {
2825         case DataType::Type::kUint8:
2826         case DataType::Type::kInt8:
2827         case DataType::Type::kUint16:
2828         case DataType::Type::kInt16:
2829         case DataType::Type::kInt32:
2830           locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2831           // Make the output overlap to please the register allocator. This greatly simplifies
2832           // the validation of the linear scan implementation
2833           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2834           break;
2835         case DataType::Type::kInt64: {
2836           HInstruction* input = conversion->InputAt(0);
2837           Location input_location = input->IsConstant()
2838               ? Location::ConstantLocation(input->AsConstant())
2839               : Location::RegisterPairLocation(EAX, EDX);
2840           locations->SetInAt(0, input_location);
2841           // Make the output overlap to please the register allocator. This greatly simplifies
2842           // the validation of the linear scan implementation
2843           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2844           break;
2845         }
2846 
2847         default:
2848           LOG(FATAL) << "Unexpected type conversion from " << input_type
2849                      << " to " << result_type;
2850       }
2851       break;
2852 
2853     case DataType::Type::kUint16:
2854     case DataType::Type::kInt16:
2855       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2856       locations->SetInAt(0, Location::Any());
2857       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2858       break;
2859 
2860     case DataType::Type::kInt32:
2861       switch (input_type) {
2862         case DataType::Type::kInt64:
2863           locations->SetInAt(0, Location::Any());
2864           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2865           break;
2866 
2867         case DataType::Type::kFloat32:
2868           locations->SetInAt(0, Location::RequiresFpuRegister());
2869           locations->SetOut(Location::RequiresRegister());
2870           locations->AddTemp(Location::RequiresFpuRegister());
2871           break;
2872 
2873         case DataType::Type::kFloat64:
2874           locations->SetInAt(0, Location::RequiresFpuRegister());
2875           locations->SetOut(Location::RequiresRegister());
2876           locations->AddTemp(Location::RequiresFpuRegister());
2877           break;
2878 
2879         default:
2880           LOG(FATAL) << "Unexpected type conversion from " << input_type
2881                      << " to " << result_type;
2882       }
2883       break;
2884 
2885     case DataType::Type::kInt64:
2886       switch (input_type) {
2887         case DataType::Type::kBool:
2888         case DataType::Type::kUint8:
2889         case DataType::Type::kInt8:
2890         case DataType::Type::kUint16:
2891         case DataType::Type::kInt16:
2892         case DataType::Type::kInt32:
2893           locations->SetInAt(0, Location::RegisterLocation(EAX));
2894           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2895           break;
2896 
2897         case DataType::Type::kFloat32:
2898         case DataType::Type::kFloat64: {
2899           InvokeRuntimeCallingConvention calling_convention;
2900           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2901           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2902 
2903           // The runtime helper puts the result in EAX, EDX.
2904           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2905         }
2906         break;
2907 
2908         default:
2909           LOG(FATAL) << "Unexpected type conversion from " << input_type
2910                      << " to " << result_type;
2911       }
2912       break;
2913 
2914     case DataType::Type::kFloat32:
2915       switch (input_type) {
2916         case DataType::Type::kBool:
2917         case DataType::Type::kUint8:
2918         case DataType::Type::kInt8:
2919         case DataType::Type::kUint16:
2920         case DataType::Type::kInt16:
2921         case DataType::Type::kInt32:
2922           locations->SetInAt(0, Location::RequiresRegister());
2923           locations->SetOut(Location::RequiresFpuRegister());
2924           break;
2925 
2926         case DataType::Type::kInt64:
2927           locations->SetInAt(0, Location::Any());
2928           locations->SetOut(Location::Any());
2929           break;
2930 
2931         case DataType::Type::kFloat64:
2932           locations->SetInAt(0, Location::RequiresFpuRegister());
2933           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2934           break;
2935 
2936         default:
2937           LOG(FATAL) << "Unexpected type conversion from " << input_type
2938                      << " to " << result_type;
2939       }
2940       break;
2941 
2942     case DataType::Type::kFloat64:
2943       switch (input_type) {
2944         case DataType::Type::kBool:
2945         case DataType::Type::kUint8:
2946         case DataType::Type::kInt8:
2947         case DataType::Type::kUint16:
2948         case DataType::Type::kInt16:
2949         case DataType::Type::kInt32:
2950           locations->SetInAt(0, Location::RequiresRegister());
2951           locations->SetOut(Location::RequiresFpuRegister());
2952           break;
2953 
2954         case DataType::Type::kInt64:
2955           locations->SetInAt(0, Location::Any());
2956           locations->SetOut(Location::Any());
2957           break;
2958 
2959         case DataType::Type::kFloat32:
2960           locations->SetInAt(0, Location::RequiresFpuRegister());
2961           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2962           break;
2963 
2964         default:
2965           LOG(FATAL) << "Unexpected type conversion from " << input_type
2966                      << " to " << result_type;
2967       }
2968       break;
2969 
2970     default:
2971       LOG(FATAL) << "Unexpected type conversion from " << input_type
2972                  << " to " << result_type;
2973   }
2974 }
2975 
VisitTypeConversion(HTypeConversion * conversion)2976 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2977   LocationSummary* locations = conversion->GetLocations();
2978   Location out = locations->Out();
2979   Location in = locations->InAt(0);
2980   DataType::Type result_type = conversion->GetResultType();
2981   DataType::Type input_type = conversion->GetInputType();
2982   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2983       << input_type << " -> " << result_type;
2984   switch (result_type) {
2985     case DataType::Type::kUint8:
2986       switch (input_type) {
2987         case DataType::Type::kInt8:
2988         case DataType::Type::kUint16:
2989         case DataType::Type::kInt16:
2990         case DataType::Type::kInt32:
2991           if (in.IsRegister()) {
2992             __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2993           } else {
2994             DCHECK(in.GetConstant()->IsIntConstant());
2995             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2996             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2997           }
2998           break;
2999         case DataType::Type::kInt64:
3000           if (in.IsRegisterPair()) {
3001             __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3002           } else {
3003             DCHECK(in.GetConstant()->IsLongConstant());
3004             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3005             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3006           }
3007           break;
3008 
3009         default:
3010           LOG(FATAL) << "Unexpected type conversion from " << input_type
3011                      << " to " << result_type;
3012       }
3013       break;
3014 
3015     case DataType::Type::kInt8:
3016       switch (input_type) {
3017         case DataType::Type::kUint8:
3018         case DataType::Type::kUint16:
3019         case DataType::Type::kInt16:
3020         case DataType::Type::kInt32:
3021           if (in.IsRegister()) {
3022             __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3023           } else {
3024             DCHECK(in.GetConstant()->IsIntConstant());
3025             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3026             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3027           }
3028           break;
3029         case DataType::Type::kInt64:
3030           if (in.IsRegisterPair()) {
3031             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3032           } else {
3033             DCHECK(in.GetConstant()->IsLongConstant());
3034             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3035             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3036           }
3037           break;
3038 
3039         default:
3040           LOG(FATAL) << "Unexpected type conversion from " << input_type
3041                      << " to " << result_type;
3042       }
3043       break;
3044 
3045     case DataType::Type::kUint16:
3046       switch (input_type) {
3047         case DataType::Type::kInt8:
3048         case DataType::Type::kInt16:
3049         case DataType::Type::kInt32:
3050           if (in.IsRegister()) {
3051             __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3052           } else if (in.IsStackSlot()) {
3053             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3054           } else {
3055             DCHECK(in.GetConstant()->IsIntConstant());
3056             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3057             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3058           }
3059           break;
3060         case DataType::Type::kInt64:
3061           if (in.IsRegisterPair()) {
3062             __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3063           } else if (in.IsDoubleStackSlot()) {
3064             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3065           } else {
3066             DCHECK(in.GetConstant()->IsLongConstant());
3067             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3068             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3069           }
3070           break;
3071 
3072         default:
3073           LOG(FATAL) << "Unexpected type conversion from " << input_type
3074                      << " to " << result_type;
3075       }
3076       break;
3077 
3078     case DataType::Type::kInt16:
3079       switch (input_type) {
3080         case DataType::Type::kUint16:
3081         case DataType::Type::kInt32:
3082           if (in.IsRegister()) {
3083             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3084           } else if (in.IsStackSlot()) {
3085             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3086           } else {
3087             DCHECK(in.GetConstant()->IsIntConstant());
3088             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3089             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3090           }
3091           break;
3092         case DataType::Type::kInt64:
3093           if (in.IsRegisterPair()) {
3094             __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3095           } else if (in.IsDoubleStackSlot()) {
3096             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3097           } else {
3098             DCHECK(in.GetConstant()->IsLongConstant());
3099             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3100             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3101           }
3102           break;
3103 
3104         default:
3105           LOG(FATAL) << "Unexpected type conversion from " << input_type
3106                      << " to " << result_type;
3107       }
3108       break;
3109 
3110     case DataType::Type::kInt32:
3111       switch (input_type) {
3112         case DataType::Type::kInt64:
3113           if (in.IsRegisterPair()) {
3114             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3115           } else if (in.IsDoubleStackSlot()) {
3116             __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3117           } else {
3118             DCHECK(in.IsConstant());
3119             DCHECK(in.GetConstant()->IsLongConstant());
3120             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3121             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3122           }
3123           break;
3124 
3125         case DataType::Type::kFloat32: {
3126           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3127           Register output = out.AsRegister<Register>();
3128           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3129           NearLabel done, nan;
3130 
3131           __ movl(output, Immediate(kPrimIntMax));
3132           // temp = int-to-float(output)
3133           __ cvtsi2ss(temp, output);
3134           // if input >= temp goto done
3135           __ comiss(input, temp);
3136           __ j(kAboveEqual, &done);
3137           // if input == NaN goto nan
3138           __ j(kUnordered, &nan);
3139           // output = float-to-int-truncate(input)
3140           __ cvttss2si(output, input);
3141           __ jmp(&done);
3142           __ Bind(&nan);
3143           //  output = 0
3144           __ xorl(output, output);
3145           __ Bind(&done);
3146           break;
3147         }
3148 
3149         case DataType::Type::kFloat64: {
3150           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3151           Register output = out.AsRegister<Register>();
3152           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3153           NearLabel done, nan;
3154 
3155           __ movl(output, Immediate(kPrimIntMax));
3156           // temp = int-to-double(output)
3157           __ cvtsi2sd(temp, output);
3158           // if input >= temp goto done
3159           __ comisd(input, temp);
3160           __ j(kAboveEqual, &done);
3161           // if input == NaN goto nan
3162           __ j(kUnordered, &nan);
3163           // output = double-to-int-truncate(input)
3164           __ cvttsd2si(output, input);
3165           __ jmp(&done);
3166           __ Bind(&nan);
3167           //  output = 0
3168           __ xorl(output, output);
3169           __ Bind(&done);
3170           break;
3171         }
3172 
3173         default:
3174           LOG(FATAL) << "Unexpected type conversion from " << input_type
3175                      << " to " << result_type;
3176       }
3177       break;
3178 
3179     case DataType::Type::kInt64:
3180       switch (input_type) {
3181         case DataType::Type::kBool:
3182         case DataType::Type::kUint8:
3183         case DataType::Type::kInt8:
3184         case DataType::Type::kUint16:
3185         case DataType::Type::kInt16:
3186         case DataType::Type::kInt32:
3187           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3188           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3189           DCHECK_EQ(in.AsRegister<Register>(), EAX);
3190           __ cdq();
3191           break;
3192 
3193         case DataType::Type::kFloat32:
3194           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3195           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3196           break;
3197 
3198         case DataType::Type::kFloat64:
3199           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3200           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3201           break;
3202 
3203         default:
3204           LOG(FATAL) << "Unexpected type conversion from " << input_type
3205                      << " to " << result_type;
3206       }
3207       break;
3208 
3209     case DataType::Type::kFloat32:
3210       switch (input_type) {
3211         case DataType::Type::kBool:
3212         case DataType::Type::kUint8:
3213         case DataType::Type::kInt8:
3214         case DataType::Type::kUint16:
3215         case DataType::Type::kInt16:
3216         case DataType::Type::kInt32:
3217           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3218           break;
3219 
3220         case DataType::Type::kInt64: {
3221           size_t adjustment = 0;
3222 
3223           // Create stack space for the call to
3224           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3225           // TODO: enhance register allocator to ask for stack temporaries.
3226           if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3227             adjustment = DataType::Size(DataType::Type::kInt64);
3228             codegen_->IncreaseFrame(adjustment);
3229           }
3230 
3231           // Load the value to the FP stack, using temporaries if needed.
3232           PushOntoFPStack(in, 0, adjustment, false, true);
3233 
3234           if (out.IsStackSlot()) {
3235             __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3236           } else {
3237             __ fstps(Address(ESP, 0));
3238             Location stack_temp = Location::StackSlot(0);
3239             codegen_->Move32(out, stack_temp);
3240           }
3241 
3242           // Remove the temporary stack space we allocated.
3243           if (adjustment != 0) {
3244             codegen_->DecreaseFrame(adjustment);
3245           }
3246           break;
3247         }
3248 
3249         case DataType::Type::kFloat64:
3250           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3251           break;
3252 
3253         default:
3254           LOG(FATAL) << "Unexpected type conversion from " << input_type
3255                      << " to " << result_type;
3256       }
3257       break;
3258 
3259     case DataType::Type::kFloat64:
3260       switch (input_type) {
3261         case DataType::Type::kBool:
3262         case DataType::Type::kUint8:
3263         case DataType::Type::kInt8:
3264         case DataType::Type::kUint16:
3265         case DataType::Type::kInt16:
3266         case DataType::Type::kInt32:
3267           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3268           break;
3269 
3270         case DataType::Type::kInt64: {
3271           size_t adjustment = 0;
3272 
3273           // Create stack space for the call to
3274           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3275           // TODO: enhance register allocator to ask for stack temporaries.
3276           if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3277             adjustment = DataType::Size(DataType::Type::kInt64);
3278             codegen_->IncreaseFrame(adjustment);
3279           }
3280 
3281           // Load the value to the FP stack, using temporaries if needed.
3282           PushOntoFPStack(in, 0, adjustment, false, true);
3283 
3284           if (out.IsDoubleStackSlot()) {
3285             __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3286           } else {
3287             __ fstpl(Address(ESP, 0));
3288             Location stack_temp = Location::DoubleStackSlot(0);
3289             codegen_->Move64(out, stack_temp);
3290           }
3291 
3292           // Remove the temporary stack space we allocated.
3293           if (adjustment != 0) {
3294             codegen_->DecreaseFrame(adjustment);
3295           }
3296           break;
3297         }
3298 
3299         case DataType::Type::kFloat32:
3300           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3301           break;
3302 
3303         default:
3304           LOG(FATAL) << "Unexpected type conversion from " << input_type
3305                      << " to " << result_type;
3306       }
3307       break;
3308 
3309     default:
3310       LOG(FATAL) << "Unexpected type conversion from " << input_type
3311                  << " to " << result_type;
3312   }
3313 }
3314 
VisitAdd(HAdd * add)3315 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3316   LocationSummary* locations =
3317       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3318   switch (add->GetResultType()) {
3319     case DataType::Type::kInt32: {
3320       locations->SetInAt(0, Location::RequiresRegister());
3321       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3322       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3323       break;
3324     }
3325 
3326     case DataType::Type::kInt64: {
3327       locations->SetInAt(0, Location::RequiresRegister());
3328       locations->SetInAt(1, Location::Any());
3329       locations->SetOut(Location::SameAsFirstInput());
3330       break;
3331     }
3332 
3333     case DataType::Type::kFloat32:
3334     case DataType::Type::kFloat64: {
3335       locations->SetInAt(0, Location::RequiresFpuRegister());
3336       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3337         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3338       } else if (add->InputAt(1)->IsConstant()) {
3339         locations->SetInAt(1, Location::RequiresFpuRegister());
3340       } else {
3341         locations->SetInAt(1, Location::Any());
3342       }
3343       locations->SetOut(Location::SameAsFirstInput());
3344       break;
3345     }
3346 
3347     default:
3348       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3349       UNREACHABLE();
3350   }
3351 }
3352 
VisitAdd(HAdd * add)3353 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3354   LocationSummary* locations = add->GetLocations();
3355   Location first = locations->InAt(0);
3356   Location second = locations->InAt(1);
3357   Location out = locations->Out();
3358 
3359   switch (add->GetResultType()) {
3360     case DataType::Type::kInt32: {
3361       if (second.IsRegister()) {
3362         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3363           __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3364         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3365           __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3366         } else {
3367           __ leal(out.AsRegister<Register>(), Address(
3368               first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3369           }
3370       } else if (second.IsConstant()) {
3371         int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3372         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3373           __ addl(out.AsRegister<Register>(), Immediate(value));
3374         } else {
3375           __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3376         }
3377       } else {
3378         DCHECK(first.Equals(locations->Out()));
3379         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3380       }
3381       break;
3382     }
3383 
3384     case DataType::Type::kInt64: {
3385       if (second.IsRegisterPair()) {
3386         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3387         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3388       } else if (second.IsDoubleStackSlot()) {
3389         __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3390         __ adcl(first.AsRegisterPairHigh<Register>(),
3391                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3392       } else {
3393         DCHECK(second.IsConstant()) << second;
3394         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3395         __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3396         __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3397       }
3398       break;
3399     }
3400 
3401     case DataType::Type::kFloat32: {
3402       if (second.IsFpuRegister()) {
3403         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3404       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3405         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3406         DCHECK(const_area->IsEmittedAtUseSite());
3407         __ addss(first.AsFpuRegister<XmmRegister>(),
3408                  codegen_->LiteralFloatAddress(
3409                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3410                      const_area->GetBaseMethodAddress(),
3411                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3412       } else {
3413         DCHECK(second.IsStackSlot());
3414         __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3415       }
3416       break;
3417     }
3418 
3419     case DataType::Type::kFloat64: {
3420       if (second.IsFpuRegister()) {
3421         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3422       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3423         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3424         DCHECK(const_area->IsEmittedAtUseSite());
3425         __ addsd(first.AsFpuRegister<XmmRegister>(),
3426                  codegen_->LiteralDoubleAddress(
3427                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3428                      const_area->GetBaseMethodAddress(),
3429                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3430       } else {
3431         DCHECK(second.IsDoubleStackSlot());
3432         __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3433       }
3434       break;
3435     }
3436 
3437     default:
3438       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3439   }
3440 }
3441 
VisitSub(HSub * sub)3442 void LocationsBuilderX86::VisitSub(HSub* sub) {
3443   LocationSummary* locations =
3444       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3445   switch (sub->GetResultType()) {
3446     case DataType::Type::kInt32:
3447     case DataType::Type::kInt64: {
3448       locations->SetInAt(0, Location::RequiresRegister());
3449       locations->SetInAt(1, Location::Any());
3450       locations->SetOut(Location::SameAsFirstInput());
3451       break;
3452     }
3453     case DataType::Type::kFloat32:
3454     case DataType::Type::kFloat64: {
3455       locations->SetInAt(0, Location::RequiresFpuRegister());
3456       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3457         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3458       } else if (sub->InputAt(1)->IsConstant()) {
3459         locations->SetInAt(1, Location::RequiresFpuRegister());
3460       } else {
3461         locations->SetInAt(1, Location::Any());
3462       }
3463       locations->SetOut(Location::SameAsFirstInput());
3464       break;
3465     }
3466 
3467     default:
3468       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3469   }
3470 }
3471 
VisitSub(HSub * sub)3472 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3473   LocationSummary* locations = sub->GetLocations();
3474   Location first = locations->InAt(0);
3475   Location second = locations->InAt(1);
3476   DCHECK(first.Equals(locations->Out()));
3477   switch (sub->GetResultType()) {
3478     case DataType::Type::kInt32: {
3479       if (second.IsRegister()) {
3480         __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3481       } else if (second.IsConstant()) {
3482         __ subl(first.AsRegister<Register>(),
3483                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3484       } else {
3485         __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3486       }
3487       break;
3488     }
3489 
3490     case DataType::Type::kInt64: {
3491       if (second.IsRegisterPair()) {
3492         __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3493         __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3494       } else if (second.IsDoubleStackSlot()) {
3495         __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3496         __ sbbl(first.AsRegisterPairHigh<Register>(),
3497                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3498       } else {
3499         DCHECK(second.IsConstant()) << second;
3500         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3501         __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3502         __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3503       }
3504       break;
3505     }
3506 
3507     case DataType::Type::kFloat32: {
3508       if (second.IsFpuRegister()) {
3509         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3510       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3511         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3512         DCHECK(const_area->IsEmittedAtUseSite());
3513         __ subss(first.AsFpuRegister<XmmRegister>(),
3514                  codegen_->LiteralFloatAddress(
3515                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3516                      const_area->GetBaseMethodAddress(),
3517                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3518       } else {
3519         DCHECK(second.IsStackSlot());
3520         __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3521       }
3522       break;
3523     }
3524 
3525     case DataType::Type::kFloat64: {
3526       if (second.IsFpuRegister()) {
3527         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3528       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3529         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3530         DCHECK(const_area->IsEmittedAtUseSite());
3531         __ subsd(first.AsFpuRegister<XmmRegister>(),
3532                  codegen_->LiteralDoubleAddress(
3533                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3534                      const_area->GetBaseMethodAddress(),
3535                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3536       } else {
3537         DCHECK(second.IsDoubleStackSlot());
3538         __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3539       }
3540       break;
3541     }
3542 
3543     default:
3544       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3545   }
3546 }
3547 
VisitMul(HMul * mul)3548 void LocationsBuilderX86::VisitMul(HMul* mul) {
3549   LocationSummary* locations =
3550       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3551   switch (mul->GetResultType()) {
3552     case DataType::Type::kInt32:
3553       locations->SetInAt(0, Location::RequiresRegister());
3554       locations->SetInAt(1, Location::Any());
3555       if (mul->InputAt(1)->IsIntConstant()) {
3556         // Can use 3 operand multiply.
3557         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3558       } else {
3559         locations->SetOut(Location::SameAsFirstInput());
3560       }
3561       break;
3562     case DataType::Type::kInt64: {
3563       locations->SetInAt(0, Location::RequiresRegister());
3564       locations->SetInAt(1, Location::Any());
3565       locations->SetOut(Location::SameAsFirstInput());
3566       // Needed for imul on 32bits with 64bits output.
3567       locations->AddTemp(Location::RegisterLocation(EAX));
3568       locations->AddTemp(Location::RegisterLocation(EDX));
3569       break;
3570     }
3571     case DataType::Type::kFloat32:
3572     case DataType::Type::kFloat64: {
3573       locations->SetInAt(0, Location::RequiresFpuRegister());
3574       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3575         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3576       } else if (mul->InputAt(1)->IsConstant()) {
3577         locations->SetInAt(1, Location::RequiresFpuRegister());
3578       } else {
3579         locations->SetInAt(1, Location::Any());
3580       }
3581       locations->SetOut(Location::SameAsFirstInput());
3582       break;
3583     }
3584 
3585     default:
3586       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3587   }
3588 }
3589 
VisitMul(HMul * mul)3590 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3591   LocationSummary* locations = mul->GetLocations();
3592   Location first = locations->InAt(0);
3593   Location second = locations->InAt(1);
3594   Location out = locations->Out();
3595 
3596   switch (mul->GetResultType()) {
3597     case DataType::Type::kInt32:
3598       // The constant may have ended up in a register, so test explicitly to avoid
3599       // problems where the output may not be the same as the first operand.
3600       if (mul->InputAt(1)->IsIntConstant()) {
3601         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3602         __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3603       } else if (second.IsRegister()) {
3604         DCHECK(first.Equals(out));
3605         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3606       } else {
3607         DCHECK(second.IsStackSlot());
3608         DCHECK(first.Equals(out));
3609         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3610       }
3611       break;
3612 
3613     case DataType::Type::kInt64: {
3614       Register in1_hi = first.AsRegisterPairHigh<Register>();
3615       Register in1_lo = first.AsRegisterPairLow<Register>();
3616       Register eax = locations->GetTemp(0).AsRegister<Register>();
3617       Register edx = locations->GetTemp(1).AsRegister<Register>();
3618 
3619       DCHECK_EQ(EAX, eax);
3620       DCHECK_EQ(EDX, edx);
3621 
3622       // input: in1 - 64 bits, in2 - 64 bits.
3623       // output: in1
3624       // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3625       // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3626       // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3627       if (second.IsConstant()) {
3628         DCHECK(second.GetConstant()->IsLongConstant());
3629 
3630         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3631         int32_t low_value = Low32Bits(value);
3632         int32_t high_value = High32Bits(value);
3633         Immediate low(low_value);
3634         Immediate high(high_value);
3635 
3636         __ movl(eax, high);
3637         // eax <- in1.lo * in2.hi
3638         __ imull(eax, in1_lo);
3639         // in1.hi <- in1.hi * in2.lo
3640         __ imull(in1_hi, low);
3641         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3642         __ addl(in1_hi, eax);
3643         // move in2_lo to eax to prepare for double precision
3644         __ movl(eax, low);
3645         // edx:eax <- in1.lo * in2.lo
3646         __ mull(in1_lo);
3647         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3648         __ addl(in1_hi, edx);
3649         // in1.lo <- (in1.lo * in2.lo)[31:0];
3650         __ movl(in1_lo, eax);
3651       } else if (second.IsRegisterPair()) {
3652         Register in2_hi = second.AsRegisterPairHigh<Register>();
3653         Register in2_lo = second.AsRegisterPairLow<Register>();
3654 
3655         __ movl(eax, in2_hi);
3656         // eax <- in1.lo * in2.hi
3657         __ imull(eax, in1_lo);
3658         // in1.hi <- in1.hi * in2.lo
3659         __ imull(in1_hi, in2_lo);
3660         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3661         __ addl(in1_hi, eax);
3662         // move in1_lo to eax to prepare for double precision
3663         __ movl(eax, in1_lo);
3664         // edx:eax <- in1.lo * in2.lo
3665         __ mull(in2_lo);
3666         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3667         __ addl(in1_hi, edx);
3668         // in1.lo <- (in1.lo * in2.lo)[31:0];
3669         __ movl(in1_lo, eax);
3670       } else {
3671         DCHECK(second.IsDoubleStackSlot()) << second;
3672         Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3673         Address in2_lo(ESP, second.GetStackIndex());
3674 
3675         __ movl(eax, in2_hi);
3676         // eax <- in1.lo * in2.hi
3677         __ imull(eax, in1_lo);
3678         // in1.hi <- in1.hi * in2.lo
3679         __ imull(in1_hi, in2_lo);
3680         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3681         __ addl(in1_hi, eax);
3682         // move in1_lo to eax to prepare for double precision
3683         __ movl(eax, in1_lo);
3684         // edx:eax <- in1.lo * in2.lo
3685         __ mull(in2_lo);
3686         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3687         __ addl(in1_hi, edx);
3688         // in1.lo <- (in1.lo * in2.lo)[31:0];
3689         __ movl(in1_lo, eax);
3690       }
3691 
3692       break;
3693     }
3694 
3695     case DataType::Type::kFloat32: {
3696       DCHECK(first.Equals(locations->Out()));
3697       if (second.IsFpuRegister()) {
3698         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3699       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3700         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3701         DCHECK(const_area->IsEmittedAtUseSite());
3702         __ mulss(first.AsFpuRegister<XmmRegister>(),
3703                  codegen_->LiteralFloatAddress(
3704                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3705                      const_area->GetBaseMethodAddress(),
3706                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3707       } else {
3708         DCHECK(second.IsStackSlot());
3709         __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3710       }
3711       break;
3712     }
3713 
3714     case DataType::Type::kFloat64: {
3715       DCHECK(first.Equals(locations->Out()));
3716       if (second.IsFpuRegister()) {
3717         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3718       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3719         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3720         DCHECK(const_area->IsEmittedAtUseSite());
3721         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3722                  codegen_->LiteralDoubleAddress(
3723                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3724                      const_area->GetBaseMethodAddress(),
3725                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3726       } else {
3727         DCHECK(second.IsDoubleStackSlot());
3728         __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3729       }
3730       break;
3731     }
3732 
3733     default:
3734       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3735   }
3736 }
3737 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3738 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3739                                                   uint32_t temp_offset,
3740                                                   uint32_t stack_adjustment,
3741                                                   bool is_fp,
3742                                                   bool is_wide) {
3743   if (source.IsStackSlot()) {
3744     DCHECK(!is_wide);
3745     if (is_fp) {
3746       __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3747     } else {
3748       __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3749     }
3750   } else if (source.IsDoubleStackSlot()) {
3751     DCHECK(is_wide);
3752     if (is_fp) {
3753       __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3754     } else {
3755       __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3756     }
3757   } else {
3758     // Write the value to the temporary location on the stack and load to FP stack.
3759     if (!is_wide) {
3760       Location stack_temp = Location::StackSlot(temp_offset);
3761       codegen_->Move32(stack_temp, source);
3762       if (is_fp) {
3763         __ flds(Address(ESP, temp_offset));
3764       } else {
3765         __ filds(Address(ESP, temp_offset));
3766       }
3767     } else {
3768       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3769       codegen_->Move64(stack_temp, source);
3770       if (is_fp) {
3771         __ fldl(Address(ESP, temp_offset));
3772       } else {
3773         __ fildl(Address(ESP, temp_offset));
3774       }
3775     }
3776   }
3777 }
3778 
GenerateRemFP(HRem * rem)3779 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3780   DataType::Type type = rem->GetResultType();
3781   bool is_float = type == DataType::Type::kFloat32;
3782   size_t elem_size = DataType::Size(type);
3783   LocationSummary* locations = rem->GetLocations();
3784   Location first = locations->InAt(0);
3785   Location second = locations->InAt(1);
3786   Location out = locations->Out();
3787 
3788   // Create stack space for 2 elements.
3789   // TODO: enhance register allocator to ask for stack temporaries.
3790   codegen_->IncreaseFrame(2 * elem_size);
3791 
3792   // Load the values to the FP stack in reverse order, using temporaries if needed.
3793   const bool is_wide = !is_float;
3794   PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3795   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3796 
3797   // Loop doing FPREM until we stabilize.
3798   NearLabel retry;
3799   __ Bind(&retry);
3800   __ fprem();
3801 
3802   // Move FP status to AX.
3803   __ fstsw();
3804 
3805   // And see if the argument reduction is complete. This is signaled by the
3806   // C2 FPU flag bit set to 0.
3807   __ andl(EAX, Immediate(kC2ConditionMask));
3808   __ j(kNotEqual, &retry);
3809 
3810   // We have settled on the final value. Retrieve it into an XMM register.
3811   // Store FP top of stack to real stack.
3812   if (is_float) {
3813     __ fsts(Address(ESP, 0));
3814   } else {
3815     __ fstl(Address(ESP, 0));
3816   }
3817 
3818   // Pop the 2 items from the FP stack.
3819   __ fucompp();
3820 
3821   // Load the value from the stack into an XMM register.
3822   DCHECK(out.IsFpuRegister()) << out;
3823   if (is_float) {
3824     __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3825   } else {
3826     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3827   }
3828 
3829   // And remove the temporary stack space we allocated.
3830   codegen_->DecreaseFrame(2 * elem_size);
3831 }
3832 
3833 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3834 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3835   DCHECK(instruction->IsDiv() || instruction->IsRem());
3836 
3837   LocationSummary* locations = instruction->GetLocations();
3838   DCHECK(locations->InAt(1).IsConstant());
3839   DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3840 
3841   Register out_register = locations->Out().AsRegister<Register>();
3842   Register input_register = locations->InAt(0).AsRegister<Register>();
3843   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3844 
3845   DCHECK(imm == 1 || imm == -1);
3846 
3847   if (instruction->IsRem()) {
3848     __ xorl(out_register, out_register);
3849   } else {
3850     __ movl(out_register, input_register);
3851     if (imm == -1) {
3852       __ negl(out_register);
3853     }
3854   }
3855 }
3856 
RemByPowerOfTwo(HRem * instruction)3857 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3858   LocationSummary* locations = instruction->GetLocations();
3859   Location second = locations->InAt(1);
3860 
3861   Register out = locations->Out().AsRegister<Register>();
3862   Register numerator = locations->InAt(0).AsRegister<Register>();
3863 
3864   int32_t imm = Int64FromConstant(second.GetConstant());
3865   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3866   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3867 
3868   Register tmp = locations->GetTemp(0).AsRegister<Register>();
3869   NearLabel done;
3870   __ movl(out, numerator);
3871   __ andl(out, Immediate(abs_imm-1));
3872   __ j(Condition::kZero, &done);
3873   __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3874   __ testl(numerator, numerator);
3875   __ cmovl(Condition::kLess, out, tmp);
3876   __ Bind(&done);
3877 }
3878 
DivByPowerOfTwo(HDiv * instruction)3879 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3880   LocationSummary* locations = instruction->GetLocations();
3881 
3882   Register out_register = locations->Out().AsRegister<Register>();
3883   Register input_register = locations->InAt(0).AsRegister<Register>();
3884   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3885   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3886   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3887 
3888   Register num = locations->GetTemp(0).AsRegister<Register>();
3889 
3890   __ leal(num, Address(input_register, abs_imm - 1));
3891   __ testl(input_register, input_register);
3892   __ cmovl(kGreaterEqual, num, input_register);
3893   int shift = CTZ(imm);
3894   __ sarl(num, Immediate(shift));
3895 
3896   if (imm < 0) {
3897     __ negl(num);
3898   }
3899 
3900   __ movl(out_register, num);
3901 }
3902 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3903 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3904   DCHECK(instruction->IsDiv() || instruction->IsRem());
3905 
3906   LocationSummary* locations = instruction->GetLocations();
3907   int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3908 
3909   Register eax = locations->InAt(0).AsRegister<Register>();
3910   Register out = locations->Out().AsRegister<Register>();
3911   Register num;
3912   Register edx;
3913 
3914   if (instruction->IsDiv()) {
3915     edx = locations->GetTemp(0).AsRegister<Register>();
3916     num = locations->GetTemp(1).AsRegister<Register>();
3917   } else {
3918     edx = locations->Out().AsRegister<Register>();
3919     num = locations->GetTemp(0).AsRegister<Register>();
3920   }
3921 
3922   DCHECK_EQ(EAX, eax);
3923   DCHECK_EQ(EDX, edx);
3924   if (instruction->IsDiv()) {
3925     DCHECK_EQ(EAX, out);
3926   } else {
3927     DCHECK_EQ(EDX, out);
3928   }
3929 
3930   int64_t magic;
3931   int shift;
3932   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3933 
3934   // Save the numerator.
3935   __ movl(num, eax);
3936 
3937   // EAX = magic
3938   __ movl(eax, Immediate(magic));
3939 
3940   // EDX:EAX = magic * numerator
3941   __ imull(num);
3942 
3943   if (imm > 0 && magic < 0) {
3944     // EDX += num
3945     __ addl(edx, num);
3946   } else if (imm < 0 && magic > 0) {
3947     __ subl(edx, num);
3948   }
3949 
3950   // Shift if needed.
3951   if (shift != 0) {
3952     __ sarl(edx, Immediate(shift));
3953   }
3954 
3955   // EDX += 1 if EDX < 0
3956   __ movl(eax, edx);
3957   __ shrl(edx, Immediate(31));
3958   __ addl(edx, eax);
3959 
3960   if (instruction->IsRem()) {
3961     __ movl(eax, num);
3962     __ imull(edx, Immediate(imm));
3963     __ subl(eax, edx);
3964     __ movl(edx, eax);
3965   } else {
3966     __ movl(eax, edx);
3967   }
3968 }
3969 
GenerateDivRemIntegral(HBinaryOperation * instruction)3970 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3971   DCHECK(instruction->IsDiv() || instruction->IsRem());
3972 
3973   LocationSummary* locations = instruction->GetLocations();
3974   Location out = locations->Out();
3975   Location first = locations->InAt(0);
3976   Location second = locations->InAt(1);
3977   bool is_div = instruction->IsDiv();
3978 
3979   switch (instruction->GetResultType()) {
3980     case DataType::Type::kInt32: {
3981       DCHECK_EQ(EAX, first.AsRegister<Register>());
3982       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3983 
3984       if (second.IsConstant()) {
3985         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3986 
3987         if (imm == 0) {
3988           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3989         } else if (imm == 1 || imm == -1) {
3990           DivRemOneOrMinusOne(instruction);
3991         } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3992           if (is_div) {
3993             DivByPowerOfTwo(instruction->AsDiv());
3994           } else {
3995             RemByPowerOfTwo(instruction->AsRem());
3996           }
3997         } else {
3998           DCHECK(imm <= -2 || imm >= 2);
3999           GenerateDivRemWithAnyConstant(instruction);
4000         }
4001       } else {
4002         SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4003             instruction, out.AsRegister<Register>(), is_div);
4004         codegen_->AddSlowPath(slow_path);
4005 
4006         Register second_reg = second.AsRegister<Register>();
4007         // 0x80000000/-1 triggers an arithmetic exception!
4008         // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4009         // it's safe to just use negl instead of more complex comparisons.
4010 
4011         __ cmpl(second_reg, Immediate(-1));
4012         __ j(kEqual, slow_path->GetEntryLabel());
4013 
4014         // edx:eax <- sign-extended of eax
4015         __ cdq();
4016         // eax = quotient, edx = remainder
4017         __ idivl(second_reg);
4018         __ Bind(slow_path->GetExitLabel());
4019       }
4020       break;
4021     }
4022 
4023     case DataType::Type::kInt64: {
4024       InvokeRuntimeCallingConvention calling_convention;
4025       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4026       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4027       DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4028       DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4029       DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4030       DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4031 
4032       if (is_div) {
4033         codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4034         CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4035       } else {
4036         codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4037         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4038       }
4039       break;
4040     }
4041 
4042     default:
4043       LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4044   }
4045 }
4046 
VisitDiv(HDiv * div)4047 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4048   LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4049       ? LocationSummary::kCallOnMainOnly
4050       : LocationSummary::kNoCall;
4051   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4052 
4053   switch (div->GetResultType()) {
4054     case DataType::Type::kInt32: {
4055       locations->SetInAt(0, Location::RegisterLocation(EAX));
4056       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4057       locations->SetOut(Location::SameAsFirstInput());
4058       // Intel uses edx:eax as the dividend.
4059       locations->AddTemp(Location::RegisterLocation(EDX));
4060       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4061       // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4062       // output and request another temp.
4063       if (div->InputAt(1)->IsIntConstant()) {
4064         locations->AddTemp(Location::RequiresRegister());
4065       }
4066       break;
4067     }
4068     case DataType::Type::kInt64: {
4069       InvokeRuntimeCallingConvention calling_convention;
4070       locations->SetInAt(0, Location::RegisterPairLocation(
4071           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4072       locations->SetInAt(1, Location::RegisterPairLocation(
4073           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4074       // Runtime helper puts the result in EAX, EDX.
4075       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4076       break;
4077     }
4078     case DataType::Type::kFloat32:
4079     case DataType::Type::kFloat64: {
4080       locations->SetInAt(0, Location::RequiresFpuRegister());
4081       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4082         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4083       } else if (div->InputAt(1)->IsConstant()) {
4084         locations->SetInAt(1, Location::RequiresFpuRegister());
4085       } else {
4086         locations->SetInAt(1, Location::Any());
4087       }
4088       locations->SetOut(Location::SameAsFirstInput());
4089       break;
4090     }
4091 
4092     default:
4093       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4094   }
4095 }
4096 
VisitDiv(HDiv * div)4097 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4098   LocationSummary* locations = div->GetLocations();
4099   Location first = locations->InAt(0);
4100   Location second = locations->InAt(1);
4101 
4102   switch (div->GetResultType()) {
4103     case DataType::Type::kInt32:
4104     case DataType::Type::kInt64: {
4105       GenerateDivRemIntegral(div);
4106       break;
4107     }
4108 
4109     case DataType::Type::kFloat32: {
4110       if (second.IsFpuRegister()) {
4111         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4112       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4113         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4114         DCHECK(const_area->IsEmittedAtUseSite());
4115         __ divss(first.AsFpuRegister<XmmRegister>(),
4116                  codegen_->LiteralFloatAddress(
4117                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
4118                    const_area->GetBaseMethodAddress(),
4119                    const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4120       } else {
4121         DCHECK(second.IsStackSlot());
4122         __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4123       }
4124       break;
4125     }
4126 
4127     case DataType::Type::kFloat64: {
4128       if (second.IsFpuRegister()) {
4129         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4130       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4131         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4132         DCHECK(const_area->IsEmittedAtUseSite());
4133         __ divsd(first.AsFpuRegister<XmmRegister>(),
4134                  codegen_->LiteralDoubleAddress(
4135                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4136                      const_area->GetBaseMethodAddress(),
4137                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4138       } else {
4139         DCHECK(second.IsDoubleStackSlot());
4140         __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4141       }
4142       break;
4143     }
4144 
4145     default:
4146       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4147   }
4148 }
4149 
VisitRem(HRem * rem)4150 void LocationsBuilderX86::VisitRem(HRem* rem) {
4151   DataType::Type type = rem->GetResultType();
4152 
4153   LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4154       ? LocationSummary::kCallOnMainOnly
4155       : LocationSummary::kNoCall;
4156   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4157 
4158   switch (type) {
4159     case DataType::Type::kInt32: {
4160       locations->SetInAt(0, Location::RegisterLocation(EAX));
4161       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4162       locations->SetOut(Location::RegisterLocation(EDX));
4163       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4164       // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4165       // output and request another temp.
4166       if (rem->InputAt(1)->IsIntConstant()) {
4167         locations->AddTemp(Location::RequiresRegister());
4168       }
4169       break;
4170     }
4171     case DataType::Type::kInt64: {
4172       InvokeRuntimeCallingConvention calling_convention;
4173       locations->SetInAt(0, Location::RegisterPairLocation(
4174           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4175       locations->SetInAt(1, Location::RegisterPairLocation(
4176           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4177       // Runtime helper puts the result in EAX, EDX.
4178       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4179       break;
4180     }
4181     case DataType::Type::kFloat64:
4182     case DataType::Type::kFloat32: {
4183       locations->SetInAt(0, Location::Any());
4184       locations->SetInAt(1, Location::Any());
4185       locations->SetOut(Location::RequiresFpuRegister());
4186       locations->AddTemp(Location::RegisterLocation(EAX));
4187       break;
4188     }
4189 
4190     default:
4191       LOG(FATAL) << "Unexpected rem type " << type;
4192   }
4193 }
4194 
VisitRem(HRem * rem)4195 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4196   DataType::Type type = rem->GetResultType();
4197   switch (type) {
4198     case DataType::Type::kInt32:
4199     case DataType::Type::kInt64: {
4200       GenerateDivRemIntegral(rem);
4201       break;
4202     }
4203     case DataType::Type::kFloat32:
4204     case DataType::Type::kFloat64: {
4205       GenerateRemFP(rem);
4206       break;
4207     }
4208     default:
4209       LOG(FATAL) << "Unexpected rem type " << type;
4210   }
4211 }
4212 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4213 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4214   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4215   switch (minmax->GetResultType()) {
4216     case DataType::Type::kInt32:
4217       locations->SetInAt(0, Location::RequiresRegister());
4218       locations->SetInAt(1, Location::RequiresRegister());
4219       locations->SetOut(Location::SameAsFirstInput());
4220       break;
4221     case DataType::Type::kInt64:
4222       locations->SetInAt(0, Location::RequiresRegister());
4223       locations->SetInAt(1, Location::RequiresRegister());
4224       locations->SetOut(Location::SameAsFirstInput());
4225       // Register to use to perform a long subtract to set cc.
4226       locations->AddTemp(Location::RequiresRegister());
4227       break;
4228     case DataType::Type::kFloat32:
4229       locations->SetInAt(0, Location::RequiresFpuRegister());
4230       locations->SetInAt(1, Location::RequiresFpuRegister());
4231       locations->SetOut(Location::SameAsFirstInput());
4232       locations->AddTemp(Location::RequiresRegister());
4233       break;
4234     case DataType::Type::kFloat64:
4235       locations->SetInAt(0, Location::RequiresFpuRegister());
4236       locations->SetInAt(1, Location::RequiresFpuRegister());
4237       locations->SetOut(Location::SameAsFirstInput());
4238       break;
4239     default:
4240       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4241   }
4242 }
4243 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4244 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4245                                                     bool is_min,
4246                                                     DataType::Type type) {
4247   Location op1_loc = locations->InAt(0);
4248   Location op2_loc = locations->InAt(1);
4249 
4250   // Shortcut for same input locations.
4251   if (op1_loc.Equals(op2_loc)) {
4252     // Can return immediately, as op1_loc == out_loc.
4253     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4254     //       a copy here.
4255     DCHECK(locations->Out().Equals(op1_loc));
4256     return;
4257   }
4258 
4259   if (type == DataType::Type::kInt64) {
4260     // Need to perform a subtract to get the sign right.
4261     // op1 is already in the same location as the output.
4262     Location output = locations->Out();
4263     Register output_lo = output.AsRegisterPairLow<Register>();
4264     Register output_hi = output.AsRegisterPairHigh<Register>();
4265 
4266     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4267     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4268 
4269     // The comparison is performed by subtracting the second operand from
4270     // the first operand and then setting the status flags in the same
4271     // manner as the SUB instruction."
4272     __ cmpl(output_lo, op2_lo);
4273 
4274     // Now use a temp and the borrow to finish the subtraction of op2_hi.
4275     Register temp = locations->GetTemp(0).AsRegister<Register>();
4276     __ movl(temp, output_hi);
4277     __ sbbl(temp, op2_hi);
4278 
4279     // Now the condition code is correct.
4280     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4281     __ cmovl(cond, output_lo, op2_lo);
4282     __ cmovl(cond, output_hi, op2_hi);
4283   } else {
4284     DCHECK_EQ(type, DataType::Type::kInt32);
4285     Register out = locations->Out().AsRegister<Register>();
4286     Register op2 = op2_loc.AsRegister<Register>();
4287 
4288     //  (out := op1)
4289     //  out <=? op2
4290     //  if out is min jmp done
4291     //  out := op2
4292     // done:
4293 
4294     __ cmpl(out, op2);
4295     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4296     __ cmovl(cond, out, op2);
4297   }
4298 }
4299 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4300 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4301                                                    bool is_min,
4302                                                    DataType::Type type) {
4303   Location op1_loc = locations->InAt(0);
4304   Location op2_loc = locations->InAt(1);
4305   Location out_loc = locations->Out();
4306   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4307 
4308   // Shortcut for same input locations.
4309   if (op1_loc.Equals(op2_loc)) {
4310     DCHECK(out_loc.Equals(op1_loc));
4311     return;
4312   }
4313 
4314   //  (out := op1)
4315   //  out <=? op2
4316   //  if Nan jmp Nan_label
4317   //  if out is min jmp done
4318   //  if op2 is min jmp op2_label
4319   //  handle -0/+0
4320   //  jmp done
4321   // Nan_label:
4322   //  out := NaN
4323   // op2_label:
4324   //  out := op2
4325   // done:
4326   //
4327   // This removes one jmp, but needs to copy one input (op1) to out.
4328   //
4329   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4330 
4331   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4332 
4333   NearLabel nan, done, op2_label;
4334   if (type == DataType::Type::kFloat64) {
4335     __ ucomisd(out, op2);
4336   } else {
4337     DCHECK_EQ(type, DataType::Type::kFloat32);
4338     __ ucomiss(out, op2);
4339   }
4340 
4341   __ j(Condition::kParityEven, &nan);
4342 
4343   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4344   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4345 
4346   // Handle 0.0/-0.0.
4347   if (is_min) {
4348     if (type == DataType::Type::kFloat64) {
4349       __ orpd(out, op2);
4350     } else {
4351       __ orps(out, op2);
4352     }
4353   } else {
4354     if (type == DataType::Type::kFloat64) {
4355       __ andpd(out, op2);
4356     } else {
4357       __ andps(out, op2);
4358     }
4359   }
4360   __ jmp(&done);
4361 
4362   // NaN handling.
4363   __ Bind(&nan);
4364   if (type == DataType::Type::kFloat64) {
4365     // TODO: Use a constant from the constant table (requires extra input).
4366     __ LoadLongConstant(out, kDoubleNaN);
4367   } else {
4368     Register constant = locations->GetTemp(0).AsRegister<Register>();
4369     __ movl(constant, Immediate(kFloatNaN));
4370     __ movd(out, constant);
4371   }
4372   __ jmp(&done);
4373 
4374   // out := op2;
4375   __ Bind(&op2_label);
4376   if (type == DataType::Type::kFloat64) {
4377     __ movsd(out, op2);
4378   } else {
4379     __ movss(out, op2);
4380   }
4381 
4382   // Done.
4383   __ Bind(&done);
4384 }
4385 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4386 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4387   DataType::Type type = minmax->GetResultType();
4388   switch (type) {
4389     case DataType::Type::kInt32:
4390     case DataType::Type::kInt64:
4391       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4392       break;
4393     case DataType::Type::kFloat32:
4394     case DataType::Type::kFloat64:
4395       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4396       break;
4397     default:
4398       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4399   }
4400 }
4401 
VisitMin(HMin * min)4402 void LocationsBuilderX86::VisitMin(HMin* min) {
4403   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4404 }
4405 
VisitMin(HMin * min)4406 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4407   GenerateMinMax(min, /*is_min*/ true);
4408 }
4409 
VisitMax(HMax * max)4410 void LocationsBuilderX86::VisitMax(HMax* max) {
4411   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4412 }
4413 
VisitMax(HMax * max)4414 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4415   GenerateMinMax(max, /*is_min*/ false);
4416 }
4417 
VisitAbs(HAbs * abs)4418 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4419   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4420   switch (abs->GetResultType()) {
4421     case DataType::Type::kInt32:
4422       locations->SetInAt(0, Location::RegisterLocation(EAX));
4423       locations->SetOut(Location::SameAsFirstInput());
4424       locations->AddTemp(Location::RegisterLocation(EDX));
4425       break;
4426     case DataType::Type::kInt64:
4427       locations->SetInAt(0, Location::RequiresRegister());
4428       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4429       locations->AddTemp(Location::RequiresRegister());
4430       break;
4431     case DataType::Type::kFloat32:
4432       locations->SetInAt(0, Location::RequiresFpuRegister());
4433       locations->SetOut(Location::SameAsFirstInput());
4434       locations->AddTemp(Location::RequiresFpuRegister());
4435       locations->AddTemp(Location::RequiresRegister());
4436       break;
4437     case DataType::Type::kFloat64:
4438       locations->SetInAt(0, Location::RequiresFpuRegister());
4439       locations->SetOut(Location::SameAsFirstInput());
4440       locations->AddTemp(Location::RequiresFpuRegister());
4441       break;
4442     default:
4443       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4444   }
4445 }
4446 
VisitAbs(HAbs * abs)4447 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4448   LocationSummary* locations = abs->GetLocations();
4449   switch (abs->GetResultType()) {
4450     case DataType::Type::kInt32: {
4451       Register out = locations->Out().AsRegister<Register>();
4452       DCHECK_EQ(out, EAX);
4453       Register temp = locations->GetTemp(0).AsRegister<Register>();
4454       DCHECK_EQ(temp, EDX);
4455       // Sign extend EAX into EDX.
4456       __ cdq();
4457       // XOR EAX with sign.
4458       __ xorl(EAX, EDX);
4459       // Subtract out sign to correct.
4460       __ subl(EAX, EDX);
4461       // The result is in EAX.
4462       break;
4463     }
4464     case DataType::Type::kInt64: {
4465       Location input = locations->InAt(0);
4466       Register input_lo = input.AsRegisterPairLow<Register>();
4467       Register input_hi = input.AsRegisterPairHigh<Register>();
4468       Location output = locations->Out();
4469       Register output_lo = output.AsRegisterPairLow<Register>();
4470       Register output_hi = output.AsRegisterPairHigh<Register>();
4471       Register temp = locations->GetTemp(0).AsRegister<Register>();
4472       // Compute the sign into the temporary.
4473       __ movl(temp, input_hi);
4474       __ sarl(temp, Immediate(31));
4475       // Store the sign into the output.
4476       __ movl(output_lo, temp);
4477       __ movl(output_hi, temp);
4478       // XOR the input to the output.
4479       __ xorl(output_lo, input_lo);
4480       __ xorl(output_hi, input_hi);
4481       // Subtract the sign.
4482       __ subl(output_lo, temp);
4483       __ sbbl(output_hi, temp);
4484       break;
4485     }
4486     case DataType::Type::kFloat32: {
4487       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4488       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4489       Register constant = locations->GetTemp(1).AsRegister<Register>();
4490       __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4491       __ movd(temp, constant);
4492       __ andps(out, temp);
4493       break;
4494     }
4495     case DataType::Type::kFloat64: {
4496       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4497       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4498       // TODO: Use a constant from the constant table (requires extra input).
4499       __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4500       __ andpd(out, temp);
4501       break;
4502     }
4503     default:
4504       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4505   }
4506 }
4507 
VisitDivZeroCheck(HDivZeroCheck * instruction)4508 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4509   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4510   switch (instruction->GetType()) {
4511     case DataType::Type::kBool:
4512     case DataType::Type::kUint8:
4513     case DataType::Type::kInt8:
4514     case DataType::Type::kUint16:
4515     case DataType::Type::kInt16:
4516     case DataType::Type::kInt32: {
4517       locations->SetInAt(0, Location::Any());
4518       break;
4519     }
4520     case DataType::Type::kInt64: {
4521       locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4522       if (!instruction->IsConstant()) {
4523         locations->AddTemp(Location::RequiresRegister());
4524       }
4525       break;
4526     }
4527     default:
4528       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4529   }
4530 }
4531 
VisitDivZeroCheck(HDivZeroCheck * instruction)4532 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4533   SlowPathCode* slow_path =
4534       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4535   codegen_->AddSlowPath(slow_path);
4536 
4537   LocationSummary* locations = instruction->GetLocations();
4538   Location value = locations->InAt(0);
4539 
4540   switch (instruction->GetType()) {
4541     case DataType::Type::kBool:
4542     case DataType::Type::kUint8:
4543     case DataType::Type::kInt8:
4544     case DataType::Type::kUint16:
4545     case DataType::Type::kInt16:
4546     case DataType::Type::kInt32: {
4547       if (value.IsRegister()) {
4548         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4549         __ j(kEqual, slow_path->GetEntryLabel());
4550       } else if (value.IsStackSlot()) {
4551         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4552         __ j(kEqual, slow_path->GetEntryLabel());
4553       } else {
4554         DCHECK(value.IsConstant()) << value;
4555         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4556           __ jmp(slow_path->GetEntryLabel());
4557         }
4558       }
4559       break;
4560     }
4561     case DataType::Type::kInt64: {
4562       if (value.IsRegisterPair()) {
4563         Register temp = locations->GetTemp(0).AsRegister<Register>();
4564         __ movl(temp, value.AsRegisterPairLow<Register>());
4565         __ orl(temp, value.AsRegisterPairHigh<Register>());
4566         __ j(kEqual, slow_path->GetEntryLabel());
4567       } else {
4568         DCHECK(value.IsConstant()) << value;
4569         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4570           __ jmp(slow_path->GetEntryLabel());
4571         }
4572       }
4573       break;
4574     }
4575     default:
4576       LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4577   }
4578 }
4579 
HandleShift(HBinaryOperation * op)4580 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4581   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4582 
4583   LocationSummary* locations =
4584       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4585 
4586   switch (op->GetResultType()) {
4587     case DataType::Type::kInt32:
4588     case DataType::Type::kInt64: {
4589       // Can't have Location::Any() and output SameAsFirstInput()
4590       locations->SetInAt(0, Location::RequiresRegister());
4591       // The shift count needs to be in CL or a constant.
4592       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4593       locations->SetOut(Location::SameAsFirstInput());
4594       break;
4595     }
4596     default:
4597       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4598   }
4599 }
4600 
HandleShift(HBinaryOperation * op)4601 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4602   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4603 
4604   LocationSummary* locations = op->GetLocations();
4605   Location first = locations->InAt(0);
4606   Location second = locations->InAt(1);
4607   DCHECK(first.Equals(locations->Out()));
4608 
4609   switch (op->GetResultType()) {
4610     case DataType::Type::kInt32: {
4611       DCHECK(first.IsRegister());
4612       Register first_reg = first.AsRegister<Register>();
4613       if (second.IsRegister()) {
4614         Register second_reg = second.AsRegister<Register>();
4615         DCHECK_EQ(ECX, second_reg);
4616         if (op->IsShl()) {
4617           __ shll(first_reg, second_reg);
4618         } else if (op->IsShr()) {
4619           __ sarl(first_reg, second_reg);
4620         } else {
4621           __ shrl(first_reg, second_reg);
4622         }
4623       } else {
4624         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4625         if (shift == 0) {
4626           return;
4627         }
4628         Immediate imm(shift);
4629         if (op->IsShl()) {
4630           __ shll(first_reg, imm);
4631         } else if (op->IsShr()) {
4632           __ sarl(first_reg, imm);
4633         } else {
4634           __ shrl(first_reg, imm);
4635         }
4636       }
4637       break;
4638     }
4639     case DataType::Type::kInt64: {
4640       if (second.IsRegister()) {
4641         Register second_reg = second.AsRegister<Register>();
4642         DCHECK_EQ(ECX, second_reg);
4643         if (op->IsShl()) {
4644           GenerateShlLong(first, second_reg);
4645         } else if (op->IsShr()) {
4646           GenerateShrLong(first, second_reg);
4647         } else {
4648           GenerateUShrLong(first, second_reg);
4649         }
4650       } else {
4651         // Shift by a constant.
4652         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4653         // Nothing to do if the shift is 0, as the input is already the output.
4654         if (shift != 0) {
4655           if (op->IsShl()) {
4656             GenerateShlLong(first, shift);
4657           } else if (op->IsShr()) {
4658             GenerateShrLong(first, shift);
4659           } else {
4660             GenerateUShrLong(first, shift);
4661           }
4662         }
4663       }
4664       break;
4665     }
4666     default:
4667       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4668   }
4669 }
4670 
GenerateShlLong(const Location & loc,int shift)4671 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4672   Register low = loc.AsRegisterPairLow<Register>();
4673   Register high = loc.AsRegisterPairHigh<Register>();
4674   if (shift == 1) {
4675     // This is just an addition.
4676     __ addl(low, low);
4677     __ adcl(high, high);
4678   } else if (shift == 32) {
4679     // Shift by 32 is easy. High gets low, and low gets 0.
4680     codegen_->EmitParallelMoves(
4681         loc.ToLow(),
4682         loc.ToHigh(),
4683         DataType::Type::kInt32,
4684         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4685         loc.ToLow(),
4686         DataType::Type::kInt32);
4687   } else if (shift > 32) {
4688     // Low part becomes 0.  High part is low part << (shift-32).
4689     __ movl(high, low);
4690     __ shll(high, Immediate(shift - 32));
4691     __ xorl(low, low);
4692   } else {
4693     // Between 1 and 31.
4694     __ shld(high, low, Immediate(shift));
4695     __ shll(low, Immediate(shift));
4696   }
4697 }
4698 
GenerateShlLong(const Location & loc,Register shifter)4699 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4700   NearLabel done;
4701   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4702   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4703   __ testl(shifter, Immediate(32));
4704   __ j(kEqual, &done);
4705   __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4706   __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4707   __ Bind(&done);
4708 }
4709 
GenerateShrLong(const Location & loc,int shift)4710 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4711   Register low = loc.AsRegisterPairLow<Register>();
4712   Register high = loc.AsRegisterPairHigh<Register>();
4713   if (shift == 32) {
4714     // Need to copy the sign.
4715     DCHECK_NE(low, high);
4716     __ movl(low, high);
4717     __ sarl(high, Immediate(31));
4718   } else if (shift > 32) {
4719     DCHECK_NE(low, high);
4720     // High part becomes sign. Low part is shifted by shift - 32.
4721     __ movl(low, high);
4722     __ sarl(high, Immediate(31));
4723     __ sarl(low, Immediate(shift - 32));
4724   } else {
4725     // Between 1 and 31.
4726     __ shrd(low, high, Immediate(shift));
4727     __ sarl(high, Immediate(shift));
4728   }
4729 }
4730 
GenerateShrLong(const Location & loc,Register shifter)4731 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4732   NearLabel done;
4733   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4734   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4735   __ testl(shifter, Immediate(32));
4736   __ j(kEqual, &done);
4737   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4738   __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4739   __ Bind(&done);
4740 }
4741 
GenerateUShrLong(const Location & loc,int shift)4742 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4743   Register low = loc.AsRegisterPairLow<Register>();
4744   Register high = loc.AsRegisterPairHigh<Register>();
4745   if (shift == 32) {
4746     // Shift by 32 is easy. Low gets high, and high gets 0.
4747     codegen_->EmitParallelMoves(
4748         loc.ToHigh(),
4749         loc.ToLow(),
4750         DataType::Type::kInt32,
4751         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4752         loc.ToHigh(),
4753         DataType::Type::kInt32);
4754   } else if (shift > 32) {
4755     // Low part is high >> (shift - 32). High part becomes 0.
4756     __ movl(low, high);
4757     __ shrl(low, Immediate(shift - 32));
4758     __ xorl(high, high);
4759   } else {
4760     // Between 1 and 31.
4761     __ shrd(low, high, Immediate(shift));
4762     __ shrl(high, Immediate(shift));
4763   }
4764 }
4765 
GenerateUShrLong(const Location & loc,Register shifter)4766 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4767   NearLabel done;
4768   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4769   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4770   __ testl(shifter, Immediate(32));
4771   __ j(kEqual, &done);
4772   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4773   __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4774   __ Bind(&done);
4775 }
4776 
VisitRor(HRor * ror)4777 void LocationsBuilderX86::VisitRor(HRor* ror) {
4778   LocationSummary* locations =
4779       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4780 
4781   switch (ror->GetResultType()) {
4782     case DataType::Type::kInt64:
4783       // Add the temporary needed.
4784       locations->AddTemp(Location::RequiresRegister());
4785       FALLTHROUGH_INTENDED;
4786     case DataType::Type::kInt32:
4787       locations->SetInAt(0, Location::RequiresRegister());
4788       // The shift count needs to be in CL (unless it is a constant).
4789       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4790       locations->SetOut(Location::SameAsFirstInput());
4791       break;
4792     default:
4793       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4794       UNREACHABLE();
4795   }
4796 }
4797 
VisitRor(HRor * ror)4798 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4799   LocationSummary* locations = ror->GetLocations();
4800   Location first = locations->InAt(0);
4801   Location second = locations->InAt(1);
4802 
4803   if (ror->GetResultType() == DataType::Type::kInt32) {
4804     Register first_reg = first.AsRegister<Register>();
4805     if (second.IsRegister()) {
4806       Register second_reg = second.AsRegister<Register>();
4807       __ rorl(first_reg, second_reg);
4808     } else {
4809       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4810       __ rorl(first_reg, imm);
4811     }
4812     return;
4813   }
4814 
4815   DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4816   Register first_reg_lo = first.AsRegisterPairLow<Register>();
4817   Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4818   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4819   if (second.IsRegister()) {
4820     Register second_reg = second.AsRegister<Register>();
4821     DCHECK_EQ(second_reg, ECX);
4822     __ movl(temp_reg, first_reg_hi);
4823     __ shrd(first_reg_hi, first_reg_lo, second_reg);
4824     __ shrd(first_reg_lo, temp_reg, second_reg);
4825     __ movl(temp_reg, first_reg_hi);
4826     __ testl(second_reg, Immediate(32));
4827     __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4828     __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4829   } else {
4830     int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4831     if (shift_amt == 0) {
4832       // Already fine.
4833       return;
4834     }
4835     if (shift_amt == 32) {
4836       // Just swap.
4837       __ movl(temp_reg, first_reg_lo);
4838       __ movl(first_reg_lo, first_reg_hi);
4839       __ movl(first_reg_hi, temp_reg);
4840       return;
4841     }
4842 
4843     Immediate imm(shift_amt);
4844     // Save the constents of the low value.
4845     __ movl(temp_reg, first_reg_lo);
4846 
4847     // Shift right into low, feeding bits from high.
4848     __ shrd(first_reg_lo, first_reg_hi, imm);
4849 
4850     // Shift right into high, feeding bits from the original low.
4851     __ shrd(first_reg_hi, temp_reg, imm);
4852 
4853     // Swap if needed.
4854     if (shift_amt > 32) {
4855       __ movl(temp_reg, first_reg_lo);
4856       __ movl(first_reg_lo, first_reg_hi);
4857       __ movl(first_reg_hi, temp_reg);
4858     }
4859   }
4860 }
4861 
VisitShl(HShl * shl)4862 void LocationsBuilderX86::VisitShl(HShl* shl) {
4863   HandleShift(shl);
4864 }
4865 
VisitShl(HShl * shl)4866 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4867   HandleShift(shl);
4868 }
4869 
VisitShr(HShr * shr)4870 void LocationsBuilderX86::VisitShr(HShr* shr) {
4871   HandleShift(shr);
4872 }
4873 
VisitShr(HShr * shr)4874 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4875   HandleShift(shr);
4876 }
4877 
VisitUShr(HUShr * ushr)4878 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4879   HandleShift(ushr);
4880 }
4881 
VisitUShr(HUShr * ushr)4882 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4883   HandleShift(ushr);
4884 }
4885 
VisitNewInstance(HNewInstance * instruction)4886 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4887   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4888       instruction, LocationSummary::kCallOnMainOnly);
4889   locations->SetOut(Location::RegisterLocation(EAX));
4890   InvokeRuntimeCallingConvention calling_convention;
4891   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4892 }
4893 
VisitNewInstance(HNewInstance * instruction)4894 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4895   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4896   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4897   DCHECK(!codegen_->IsLeafMethod());
4898 }
4899 
VisitNewArray(HNewArray * instruction)4900 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4901   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4902       instruction, LocationSummary::kCallOnMainOnly);
4903   locations->SetOut(Location::RegisterLocation(EAX));
4904   InvokeRuntimeCallingConvention calling_convention;
4905   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4906   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4907 }
4908 
VisitNewArray(HNewArray * instruction)4909 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4910   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4911   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4912   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4913   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4914   DCHECK(!codegen_->IsLeafMethod());
4915 }
4916 
VisitParameterValue(HParameterValue * instruction)4917 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4918   LocationSummary* locations =
4919       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4920   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4921   if (location.IsStackSlot()) {
4922     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4923   } else if (location.IsDoubleStackSlot()) {
4924     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4925   }
4926   locations->SetOut(location);
4927 }
4928 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4929 void InstructionCodeGeneratorX86::VisitParameterValue(
4930     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4931 }
4932 
VisitCurrentMethod(HCurrentMethod * instruction)4933 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4934   LocationSummary* locations =
4935       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4936   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4937 }
4938 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4939 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4940 }
4941 
VisitClassTableGet(HClassTableGet * instruction)4942 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4943   LocationSummary* locations =
4944       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4945   locations->SetInAt(0, Location::RequiresRegister());
4946   locations->SetOut(Location::RequiresRegister());
4947 }
4948 
VisitClassTableGet(HClassTableGet * instruction)4949 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4950   LocationSummary* locations = instruction->GetLocations();
4951   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4952     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4953         instruction->GetIndex(), kX86PointerSize).SizeValue();
4954     __ movl(locations->Out().AsRegister<Register>(),
4955             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4956   } else {
4957     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4958         instruction->GetIndex(), kX86PointerSize));
4959     __ movl(locations->Out().AsRegister<Register>(),
4960             Address(locations->InAt(0).AsRegister<Register>(),
4961                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4962     // temp = temp->GetImtEntryAt(method_offset);
4963     __ movl(locations->Out().AsRegister<Register>(),
4964             Address(locations->Out().AsRegister<Register>(), method_offset));
4965   }
4966 }
4967 
VisitNot(HNot * not_)4968 void LocationsBuilderX86::VisitNot(HNot* not_) {
4969   LocationSummary* locations =
4970       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4971   locations->SetInAt(0, Location::RequiresRegister());
4972   locations->SetOut(Location::SameAsFirstInput());
4973 }
4974 
VisitNot(HNot * not_)4975 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4976   LocationSummary* locations = not_->GetLocations();
4977   Location in = locations->InAt(0);
4978   Location out = locations->Out();
4979   DCHECK(in.Equals(out));
4980   switch (not_->GetResultType()) {
4981     case DataType::Type::kInt32:
4982       __ notl(out.AsRegister<Register>());
4983       break;
4984 
4985     case DataType::Type::kInt64:
4986       __ notl(out.AsRegisterPairLow<Register>());
4987       __ notl(out.AsRegisterPairHigh<Register>());
4988       break;
4989 
4990     default:
4991       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4992   }
4993 }
4994 
VisitBooleanNot(HBooleanNot * bool_not)4995 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4996   LocationSummary* locations =
4997       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4998   locations->SetInAt(0, Location::RequiresRegister());
4999   locations->SetOut(Location::SameAsFirstInput());
5000 }
5001 
VisitBooleanNot(HBooleanNot * bool_not)5002 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5003   LocationSummary* locations = bool_not->GetLocations();
5004   Location in = locations->InAt(0);
5005   Location out = locations->Out();
5006   DCHECK(in.Equals(out));
5007   __ xorl(out.AsRegister<Register>(), Immediate(1));
5008 }
5009 
VisitCompare(HCompare * compare)5010 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5011   LocationSummary* locations =
5012       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5013   switch (compare->InputAt(0)->GetType()) {
5014     case DataType::Type::kBool:
5015     case DataType::Type::kUint8:
5016     case DataType::Type::kInt8:
5017     case DataType::Type::kUint16:
5018     case DataType::Type::kInt16:
5019     case DataType::Type::kInt32:
5020     case DataType::Type::kInt64: {
5021       locations->SetInAt(0, Location::RequiresRegister());
5022       locations->SetInAt(1, Location::Any());
5023       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5024       break;
5025     }
5026     case DataType::Type::kFloat32:
5027     case DataType::Type::kFloat64: {
5028       locations->SetInAt(0, Location::RequiresFpuRegister());
5029       if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5030         DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5031       } else if (compare->InputAt(1)->IsConstant()) {
5032         locations->SetInAt(1, Location::RequiresFpuRegister());
5033       } else {
5034         locations->SetInAt(1, Location::Any());
5035       }
5036       locations->SetOut(Location::RequiresRegister());
5037       break;
5038     }
5039     default:
5040       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5041   }
5042 }
5043 
VisitCompare(HCompare * compare)5044 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5045   LocationSummary* locations = compare->GetLocations();
5046   Register out = locations->Out().AsRegister<Register>();
5047   Location left = locations->InAt(0);
5048   Location right = locations->InAt(1);
5049 
5050   NearLabel less, greater, done;
5051   Condition less_cond = kLess;
5052 
5053   switch (compare->InputAt(0)->GetType()) {
5054     case DataType::Type::kBool:
5055     case DataType::Type::kUint8:
5056     case DataType::Type::kInt8:
5057     case DataType::Type::kUint16:
5058     case DataType::Type::kInt16:
5059     case DataType::Type::kInt32: {
5060       codegen_->GenerateIntCompare(left, right);
5061       break;
5062     }
5063     case DataType::Type::kInt64: {
5064       Register left_low = left.AsRegisterPairLow<Register>();
5065       Register left_high = left.AsRegisterPairHigh<Register>();
5066       int32_t val_low = 0;
5067       int32_t val_high = 0;
5068       bool right_is_const = false;
5069 
5070       if (right.IsConstant()) {
5071         DCHECK(right.GetConstant()->IsLongConstant());
5072         right_is_const = true;
5073         int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5074         val_low = Low32Bits(val);
5075         val_high = High32Bits(val);
5076       }
5077 
5078       if (right.IsRegisterPair()) {
5079         __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5080       } else if (right.IsDoubleStackSlot()) {
5081         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5082       } else {
5083         DCHECK(right_is_const) << right;
5084         codegen_->Compare32BitValue(left_high, val_high);
5085       }
5086       __ j(kLess, &less);  // Signed compare.
5087       __ j(kGreater, &greater);  // Signed compare.
5088       if (right.IsRegisterPair()) {
5089         __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5090       } else if (right.IsDoubleStackSlot()) {
5091         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5092       } else {
5093         DCHECK(right_is_const) << right;
5094         codegen_->Compare32BitValue(left_low, val_low);
5095       }
5096       less_cond = kBelow;  // for CF (unsigned).
5097       break;
5098     }
5099     case DataType::Type::kFloat32: {
5100       GenerateFPCompare(left, right, compare, false);
5101       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5102       less_cond = kBelow;  // for CF (floats).
5103       break;
5104     }
5105     case DataType::Type::kFloat64: {
5106       GenerateFPCompare(left, right, compare, true);
5107       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5108       less_cond = kBelow;  // for CF (floats).
5109       break;
5110     }
5111     default:
5112       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5113   }
5114 
5115   __ movl(out, Immediate(0));
5116   __ j(kEqual, &done);
5117   __ j(less_cond, &less);
5118 
5119   __ Bind(&greater);
5120   __ movl(out, Immediate(1));
5121   __ jmp(&done);
5122 
5123   __ Bind(&less);
5124   __ movl(out, Immediate(-1));
5125 
5126   __ Bind(&done);
5127 }
5128 
VisitPhi(HPhi * instruction)5129 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5130   LocationSummary* locations =
5131       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5132   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5133     locations->SetInAt(i, Location::Any());
5134   }
5135   locations->SetOut(Location::Any());
5136 }
5137 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5138 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5139   LOG(FATAL) << "Unreachable";
5140 }
5141 
GenerateMemoryBarrier(MemBarrierKind kind)5142 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5143   /*
5144    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5145    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5146    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5147    */
5148   switch (kind) {
5149     case MemBarrierKind::kAnyAny: {
5150       MemoryFence();
5151       break;
5152     }
5153     case MemBarrierKind::kAnyStore:
5154     case MemBarrierKind::kLoadAny:
5155     case MemBarrierKind::kStoreStore: {
5156       // nop
5157       break;
5158     }
5159     case MemBarrierKind::kNTStoreStore:
5160       // Non-Temporal Store/Store needs an explicit fence.
5161       MemoryFence(/* non-temporal= */ true);
5162       break;
5163   }
5164 }
5165 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)5166 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5167       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5168       ArtMethod* method ATTRIBUTE_UNUSED) {
5169   return desired_dispatch_info;
5170 }
5171 
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5172 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5173   if (invoke->IsInvokeStaticOrDirect()) {
5174     return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5175   }
5176   DCHECK(invoke->IsInvokeInterface());
5177   Location location =
5178       invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5179   return location.AsRegister<Register>();
5180 }
5181 
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5182 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5183                                                                  Register temp) {
5184   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5185   if (!invoke->GetLocations()->Intrinsified()) {
5186     return location.AsRegister<Register>();
5187   }
5188   // For intrinsics we allow any location, so it may be on the stack.
5189   if (!location.IsRegister()) {
5190     __ movl(temp, Address(ESP, location.GetStackIndex()));
5191     return temp;
5192   }
5193   // For register locations, check if the register was saved. If so, get it from the stack.
5194   // Note: There is a chance that the register was saved but not overwritten, so we could
5195   // save one load. However, since this is just an intrinsic slow path we prefer this
5196   // simple and more robust approach rather that trying to determine if that's the case.
5197   SlowPathCode* slow_path = GetCurrentSlowPath();
5198   DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
5199   if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5200     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5201     __ movl(temp, Address(ESP, stack_offset));
5202     return temp;
5203   }
5204   return location.AsRegister<Register>();
5205 }
5206 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5207 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5208   switch (load_kind) {
5209     case MethodLoadKind::kBootImageLinkTimePcRelative: {
5210       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5211       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5212       __ leal(temp.AsRegister<Register>(),
5213               Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5214       RecordBootImageMethodPatch(invoke);
5215       break;
5216     }
5217     case MethodLoadKind::kBootImageRelRo: {
5218       size_t index = invoke->IsInvokeInterface()
5219           ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5220           : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5221       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5222       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5223       RecordBootImageRelRoPatch(
5224           invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5225           GetBootImageOffset(invoke));
5226       break;
5227     }
5228     case MethodLoadKind::kBssEntry: {
5229       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5230       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5231       RecordMethodBssEntryPatch(invoke);
5232       // No need for memory fence, thanks to the x86 memory model.
5233       break;
5234     }
5235     case MethodLoadKind::kJitDirectAddress: {
5236       __ movl(temp.AsRegister<Register>(),
5237               Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5238       break;
5239     }
5240     case MethodLoadKind::kRuntimeCall: {
5241       // Test situation, don't do anything.
5242       break;
5243     }
5244     default: {
5245       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5246       UNREACHABLE();
5247     }
5248   }
5249 }
5250 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5251 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5252     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5253   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
5254   switch (invoke->GetMethodLoadKind()) {
5255     case MethodLoadKind::kStringInit: {
5256       // temp = thread->string_init_entrypoint
5257       uint32_t offset =
5258           GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5259       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5260       break;
5261     }
5262     case MethodLoadKind::kRecursive: {
5263       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5264       break;
5265     }
5266     case MethodLoadKind::kRuntimeCall: {
5267       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5268       return;  // No code pointer retrieval; the runtime performs the call directly.
5269     }
5270     case MethodLoadKind::kBootImageLinkTimePcRelative:
5271       // For kCallCriticalNative we skip loading the method and do the call directly.
5272       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5273         break;
5274       }
5275       FALLTHROUGH_INTENDED;
5276     default: {
5277       LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5278     }
5279   }
5280 
5281   switch (invoke->GetCodePtrLocation()) {
5282     case CodePtrLocation::kCallSelf:
5283       __ call(GetFrameEntryLabel());
5284       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5285       break;
5286     case CodePtrLocation::kCallCriticalNative: {
5287       size_t out_frame_size =
5288           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5289                                     kNativeStackAlignment,
5290                                     GetCriticalNativeDirectCallFrameSize>(invoke);
5291       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5292         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5293         Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5294         __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5295         RecordBootImageJniEntrypointPatch(invoke);
5296       } else {
5297         // (callee_method + offset_of_jni_entry_point)()
5298         __ call(Address(callee_method.AsRegister<Register>(),
5299                         ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5300       }
5301       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5302       if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5303         // Create space for conversion.
5304         out_frame_size = 8u;
5305         IncreaseFrame(out_frame_size);
5306       }
5307       // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5308       switch (invoke->GetType()) {
5309         case DataType::Type::kBool:
5310           __ movzxb(EAX, AL);
5311           break;
5312         case DataType::Type::kInt8:
5313           __ movsxb(EAX, AL);
5314           break;
5315         case DataType::Type::kUint16:
5316           __ movzxw(EAX, EAX);
5317           break;
5318         case DataType::Type::kInt16:
5319           __ movsxw(EAX, EAX);
5320           break;
5321         case DataType::Type::kFloat32:
5322           __ fstps(Address(ESP, 0));
5323           __ movss(XMM0, Address(ESP, 0));
5324           break;
5325         case DataType::Type::kFloat64:
5326           __ fstpl(Address(ESP, 0));
5327           __ movsd(XMM0, Address(ESP, 0));
5328           break;
5329         case DataType::Type::kInt32:
5330         case DataType::Type::kInt64:
5331         case DataType::Type::kVoid:
5332           break;
5333         default:
5334           DCHECK(false) << invoke->GetType();
5335           break;
5336       }
5337       if (out_frame_size != 0u) {
5338         DecreaseFrame(out_frame_size);
5339       }
5340       break;
5341     }
5342     case CodePtrLocation::kCallArtMethod:
5343       // (callee_method + offset_of_quick_compiled_code)()
5344       __ call(Address(callee_method.AsRegister<Register>(),
5345                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5346                           kX86PointerSize).Int32Value()));
5347       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5348       break;
5349   }
5350 
5351   DCHECK(!IsLeafMethod());
5352 }
5353 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5354 void CodeGeneratorX86::GenerateVirtualCall(
5355     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5356   Register temp = temp_in.AsRegister<Register>();
5357   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5358       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5359 
5360   // Use the calling convention instead of the location of the receiver, as
5361   // intrinsics may have put the receiver in a different register. In the intrinsics
5362   // slow path, the arguments have been moved to the right place, so here we are
5363   // guaranteed that the receiver is the first register of the calling convention.
5364   InvokeDexCallingConvention calling_convention;
5365   Register receiver = calling_convention.GetRegisterAt(0);
5366   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5367   // /* HeapReference<Class> */ temp = receiver->klass_
5368   __ movl(temp, Address(receiver, class_offset));
5369   MaybeRecordImplicitNullCheck(invoke);
5370   // Instead of simply (possibly) unpoisoning `temp` here, we should
5371   // emit a read barrier for the previous class reference load.
5372   // However this is not required in practice, as this is an
5373   // intermediate/temporary reference and because the current
5374   // concurrent copying collector keeps the from-space memory
5375   // intact/accessible until the end of the marking phase (the
5376   // concurrent copying collector may not in the future).
5377   __ MaybeUnpoisonHeapReference(temp);
5378 
5379   MaybeGenerateInlineCacheCheck(invoke, temp);
5380 
5381   // temp = temp->GetMethodAt(method_offset);
5382   __ movl(temp, Address(temp, method_offset));
5383   // call temp->GetEntryPoint();
5384   __ call(Address(
5385       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5386   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5387 }
5388 
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5389 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5390                                                      uint32_t intrinsic_data) {
5391   boot_image_other_patches_.emplace_back(
5392       method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5393   __ Bind(&boot_image_other_patches_.back().label);
5394 }
5395 
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5396 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5397                                                  uint32_t boot_image_offset) {
5398   boot_image_other_patches_.emplace_back(
5399       method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5400   __ Bind(&boot_image_other_patches_.back().label);
5401 }
5402 
RecordBootImageMethodPatch(HInvoke * invoke)5403 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5404   size_t index = invoke->IsInvokeInterface()
5405       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5406       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5407   HX86ComputeBaseMethodAddress* method_address =
5408       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5409   boot_image_method_patches_.emplace_back(
5410       method_address,
5411       invoke->GetResolvedMethodReference().dex_file,
5412       invoke->GetResolvedMethodReference().index);
5413   __ Bind(&boot_image_method_patches_.back().label);
5414 }
5415 
RecordMethodBssEntryPatch(HInvoke * invoke)5416 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5417   size_t index = invoke->IsInvokeInterface()
5418       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5419       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5420   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file));
5421   HX86ComputeBaseMethodAddress* method_address =
5422       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5423   // Add the patch entry and bind its label at the end of the instruction.
5424   method_bss_entry_patches_.emplace_back(
5425       method_address,
5426       invoke->GetMethodReference().dex_file,
5427       invoke->GetMethodReference().index);
5428   __ Bind(&method_bss_entry_patches_.back().label);
5429 }
5430 
RecordBootImageTypePatch(HLoadClass * load_class)5431 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5432   HX86ComputeBaseMethodAddress* method_address =
5433       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5434   boot_image_type_patches_.emplace_back(
5435       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5436   __ Bind(&boot_image_type_patches_.back().label);
5437 }
5438 
NewTypeBssEntryPatch(HLoadClass * load_class)5439 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5440   HX86ComputeBaseMethodAddress* method_address =
5441       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5442   ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5443   switch (load_class->GetLoadKind()) {
5444     case HLoadClass::LoadKind::kBssEntry:
5445       patches = &type_bss_entry_patches_;
5446       break;
5447     case HLoadClass::LoadKind::kBssEntryPublic:
5448       patches = &public_type_bss_entry_patches_;
5449       break;
5450     case HLoadClass::LoadKind::kBssEntryPackage:
5451       patches = &package_type_bss_entry_patches_;
5452       break;
5453     default:
5454       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5455       UNREACHABLE();
5456   }
5457   patches->emplace_back(
5458       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5459   return &patches->back().label;
5460 }
5461 
RecordBootImageStringPatch(HLoadString * load_string)5462 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5463   HX86ComputeBaseMethodAddress* method_address =
5464       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5465   boot_image_string_patches_.emplace_back(
5466       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5467   __ Bind(&boot_image_string_patches_.back().label);
5468 }
5469 
NewStringBssEntryPatch(HLoadString * load_string)5470 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5471   HX86ComputeBaseMethodAddress* method_address =
5472       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5473   string_bss_entry_patches_.emplace_back(
5474       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5475   return &string_bss_entry_patches_.back().label;
5476 }
5477 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5478 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5479   HX86ComputeBaseMethodAddress* method_address =
5480       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5481   boot_image_jni_entrypoint_patches_.emplace_back(
5482       method_address,
5483       invoke->GetResolvedMethodReference().dex_file,
5484       invoke->GetResolvedMethodReference().index);
5485   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5486 }
5487 
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5488 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5489                                             uint32_t boot_image_reference,
5490                                             HInvokeStaticOrDirect* invoke) {
5491   if (GetCompilerOptions().IsBootImage()) {
5492     HX86ComputeBaseMethodAddress* method_address =
5493         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5494     DCHECK(method_address != nullptr);
5495     Register method_address_reg =
5496         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5497     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5498     RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5499   } else if (GetCompilerOptions().GetCompilePic()) {
5500     HX86ComputeBaseMethodAddress* method_address =
5501         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5502     DCHECK(method_address != nullptr);
5503     Register method_address_reg =
5504         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5505     __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5506     RecordBootImageRelRoPatch(method_address, boot_image_reference);
5507   } else {
5508     DCHECK(GetCompilerOptions().IsJitCompiler());
5509     gc::Heap* heap = Runtime::Current()->GetHeap();
5510     DCHECK(!heap->GetBootImageSpaces().empty());
5511     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5512     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5513   }
5514 }
5515 
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5516 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5517   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5518   if (GetCompilerOptions().IsBootImage()) {
5519     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5520     HX86ComputeBaseMethodAddress* method_address =
5521         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5522     DCHECK(method_address != nullptr);
5523     Register method_address_reg =
5524         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5525     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5526     MethodReference target_method = invoke->GetResolvedMethodReference();
5527     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5528     boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5529     __ Bind(&boot_image_type_patches_.back().label);
5530   } else {
5531     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5532     LoadBootImageAddress(reg, boot_image_offset, invoke);
5533   }
5534 }
5535 
5536 // The label points to the end of the "movl" or another instruction but the literal offset
5537 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5538 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5539 
5540 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5541 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5542     const ArenaDeque<X86PcRelativePatchInfo>& infos,
5543     ArenaVector<linker::LinkerPatch>* linker_patches) {
5544   for (const X86PcRelativePatchInfo& info : infos) {
5545     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5546     linker_patches->push_back(Factory(literal_offset,
5547                                       info.target_dex_file,
5548                                       GetMethodAddressOffset(info.method_address),
5549                                       info.offset_or_index));
5550   }
5551 }
5552 
5553 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5554 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5555                                      const DexFile* target_dex_file,
5556                                      uint32_t pc_insn_offset,
5557                                      uint32_t boot_image_offset) {
5558   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5559   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5560 }
5561 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5562 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5563   DCHECK(linker_patches->empty());
5564   size_t size =
5565       boot_image_method_patches_.size() +
5566       method_bss_entry_patches_.size() +
5567       boot_image_type_patches_.size() +
5568       type_bss_entry_patches_.size() +
5569       public_type_bss_entry_patches_.size() +
5570       package_type_bss_entry_patches_.size() +
5571       boot_image_string_patches_.size() +
5572       string_bss_entry_patches_.size() +
5573       boot_image_jni_entrypoint_patches_.size() +
5574       boot_image_other_patches_.size();
5575   linker_patches->reserve(size);
5576   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5577     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5578         boot_image_method_patches_, linker_patches);
5579     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5580         boot_image_type_patches_, linker_patches);
5581     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5582         boot_image_string_patches_, linker_patches);
5583   } else {
5584     DCHECK(boot_image_method_patches_.empty());
5585     DCHECK(boot_image_type_patches_.empty());
5586     DCHECK(boot_image_string_patches_.empty());
5587   }
5588   if (GetCompilerOptions().IsBootImage()) {
5589     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5590         boot_image_other_patches_, linker_patches);
5591   } else {
5592     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5593         boot_image_other_patches_, linker_patches);
5594   }
5595   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5596       method_bss_entry_patches_, linker_patches);
5597   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5598       type_bss_entry_patches_, linker_patches);
5599   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5600       public_type_bss_entry_patches_, linker_patches);
5601   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5602       package_type_bss_entry_patches_, linker_patches);
5603   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5604       string_bss_entry_patches_, linker_patches);
5605   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5606       boot_image_jni_entrypoint_patches_, linker_patches);
5607   DCHECK_EQ(size, linker_patches->size());
5608 }
5609 
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5610 void CodeGeneratorX86::MarkGCCard(Register temp,
5611                                   Register card,
5612                                   Register object,
5613                                   Register value,
5614                                   bool value_can_be_null) {
5615   NearLabel is_null;
5616   if (value_can_be_null) {
5617     __ testl(value, value);
5618     __ j(kEqual, &is_null);
5619   }
5620   // Load the address of the card table into `card`.
5621   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5622   // Calculate the offset (in the card table) of the card corresponding to
5623   // `object`.
5624   __ movl(temp, object);
5625   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5626   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5627   // `object`'s card.
5628   //
5629   // Register `card` contains the address of the card table. Note that the card
5630   // table's base is biased during its creation so that it always starts at an
5631   // address whose least-significant byte is equal to `kCardDirty` (see
5632   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5633   // below writes the `kCardDirty` (byte) value into the `object`'s card
5634   // (located at `card + object >> kCardShift`).
5635   //
5636   // This dual use of the value in register `card` (1. to calculate the location
5637   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5638   // (no need to explicitly load `kCardDirty` as an immediate value).
5639   __ movb(Address(temp, card, TIMES_1, 0),
5640           X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5641   if (value_can_be_null) {
5642     __ Bind(&is_null);
5643   }
5644 }
5645 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5646 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5647   DCHECK(instruction->IsInstanceFieldGet() ||
5648          instruction->IsStaticFieldGet() ||
5649          instruction->IsPredicatedInstanceFieldGet());
5650 
5651   bool object_field_get_with_read_barrier =
5652       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5653   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5654   LocationSummary* locations =
5655       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5656                                                        kEmitCompilerReadBarrier
5657                                                            ? LocationSummary::kCallOnSlowPath
5658                                                            : LocationSummary::kNoCall);
5659   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5660     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5661   }
5662   // receiver_input
5663   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5664   if (is_predicated) {
5665     if (DataType::IsFloatingPointType(instruction->GetType())) {
5666       locations->SetInAt(0, Location::RequiresFpuRegister());
5667     } else {
5668       locations->SetInAt(0, Location::RequiresRegister());
5669     }
5670   }
5671   if (DataType::IsFloatingPointType(instruction->GetType())) {
5672     locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5673                                     : Location::RequiresFpuRegister());
5674   } else {
5675     // The output overlaps in case of long: we don't want the low move
5676     // to overwrite the object's location.  Likewise, in the case of
5677     // an object field get with read barriers enabled, we do not want
5678     // the move to overwrite the object's location, as we need it to emit
5679     // the read barrier.
5680     locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5681                       (object_field_get_with_read_barrier ||
5682                        instruction->GetType() == DataType::Type::kInt64 ||
5683                        is_predicated)
5684                           ? Location::kOutputOverlap
5685                           : Location::kNoOutputOverlap);
5686   }
5687 
5688   if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5689     // Long values can be loaded atomically into an XMM using movsd.
5690     // So we use an XMM register as a temp to achieve atomicity (first
5691     // load the temp into the XMM and then copy the XMM into the
5692     // output, 32 bits at a time).
5693     locations->AddTemp(Location::RequiresFpuRegister());
5694   }
5695 }
5696 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5697 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5698                                                  const FieldInfo& field_info) {
5699   DCHECK(instruction->IsInstanceFieldGet() ||
5700          instruction->IsStaticFieldGet() ||
5701          instruction->IsPredicatedInstanceFieldGet());
5702 
5703   LocationSummary* locations = instruction->GetLocations();
5704   Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5705   Register base = base_loc.AsRegister<Register>();
5706   Location out = locations->Out();
5707   bool is_volatile = field_info.IsVolatile();
5708   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5709   DataType::Type load_type = instruction->GetType();
5710   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5711 
5712   switch (load_type) {
5713     case DataType::Type::kBool:
5714     case DataType::Type::kUint8: {
5715       __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5716       break;
5717     }
5718 
5719     case DataType::Type::kInt8: {
5720       __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5721       break;
5722     }
5723 
5724     case DataType::Type::kUint16: {
5725       __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5726       break;
5727     }
5728 
5729     case DataType::Type::kInt16: {
5730       __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5731       break;
5732     }
5733 
5734     case DataType::Type::kInt32:
5735       __ movl(out.AsRegister<Register>(), Address(base, offset));
5736       break;
5737 
5738     case DataType::Type::kReference: {
5739       // /* HeapReference<Object> */ out = *(base + offset)
5740       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5741         // Note that a potential implicit null check is handled in this
5742         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5743         codegen_->GenerateFieldLoadWithBakerReadBarrier(
5744             instruction, out, base, offset, /* needs_null_check= */ true);
5745         if (is_volatile) {
5746           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5747         }
5748       } else {
5749         __ movl(out.AsRegister<Register>(), Address(base, offset));
5750         codegen_->MaybeRecordImplicitNullCheck(instruction);
5751         if (is_volatile) {
5752           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5753         }
5754         // If read barriers are enabled, emit read barriers other than
5755         // Baker's using a slow path (and also unpoison the loaded
5756         // reference, if heap poisoning is enabled).
5757         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5758       }
5759       break;
5760     }
5761 
5762     case DataType::Type::kInt64: {
5763       if (is_volatile) {
5764         XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5765         __ movsd(temp, Address(base, offset));
5766         codegen_->MaybeRecordImplicitNullCheck(instruction);
5767         __ movd(out.AsRegisterPairLow<Register>(), temp);
5768         __ psrlq(temp, Immediate(32));
5769         __ movd(out.AsRegisterPairHigh<Register>(), temp);
5770       } else {
5771         DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5772         __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5773         codegen_->MaybeRecordImplicitNullCheck(instruction);
5774         __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5775       }
5776       break;
5777     }
5778 
5779     case DataType::Type::kFloat32: {
5780       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5781       break;
5782     }
5783 
5784     case DataType::Type::kFloat64: {
5785       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5786       break;
5787     }
5788 
5789     case DataType::Type::kUint32:
5790     case DataType::Type::kUint64:
5791     case DataType::Type::kVoid:
5792       LOG(FATAL) << "Unreachable type " << load_type;
5793       UNREACHABLE();
5794   }
5795 
5796   if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5797     // Potential implicit null checks, in the case of reference or
5798     // long fields, are handled in the previous switch statement.
5799   } else {
5800     codegen_->MaybeRecordImplicitNullCheck(instruction);
5801   }
5802 
5803   if (is_volatile) {
5804     if (load_type == DataType::Type::kReference) {
5805       // Memory barriers, in the case of references, are also handled
5806       // in the previous switch statement.
5807     } else {
5808       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5809     }
5810   }
5811 }
5812 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5813 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5814   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5815 
5816   LocationSummary* locations =
5817       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5818   locations->SetInAt(0, Location::RequiresRegister());
5819   bool is_volatile = field_info.IsVolatile();
5820   DataType::Type field_type = field_info.GetFieldType();
5821   bool is_byte_type = DataType::Size(field_type) == 1u;
5822 
5823   // The register allocator does not support multiple
5824   // inputs that die at entry with one in a specific register.
5825   if (is_byte_type) {
5826     // Ensure the value is in a byte register.
5827     locations->SetInAt(1, Location::RegisterLocation(EAX));
5828   } else if (DataType::IsFloatingPointType(field_type)) {
5829     if (is_volatile && field_type == DataType::Type::kFloat64) {
5830       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5831       locations->SetInAt(1, Location::RequiresFpuRegister());
5832     } else {
5833       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5834     }
5835   } else if (is_volatile && field_type == DataType::Type::kInt64) {
5836     // In order to satisfy the semantics of volatile, this must be a single instruction store.
5837     locations->SetInAt(1, Location::RequiresRegister());
5838 
5839     // 64bits value can be atomically written to an address with movsd and an XMM register.
5840     // We need two XMM registers because there's no easier way to (bit) copy a register pair
5841     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5842     // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5843     // isolated cases when we need this it isn't worth adding the extra complexity.
5844     locations->AddTemp(Location::RequiresFpuRegister());
5845     locations->AddTemp(Location::RequiresFpuRegister());
5846   } else {
5847     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5848 
5849     if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5850       // Temporary registers for the write barrier.
5851       locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
5852       // Ensure the card is in a byte register.
5853       locations->AddTemp(Location::RegisterLocation(ECX));
5854     }
5855   }
5856 }
5857 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null)5858 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5859                                                  uint32_t value_index,
5860                                                  DataType::Type field_type,
5861                                                  Address field_addr,
5862                                                  Register base,
5863                                                  bool is_volatile,
5864                                                  bool value_can_be_null) {
5865   LocationSummary* locations = instruction->GetLocations();
5866   Location value = locations->InAt(value_index);
5867   bool needs_write_barrier =
5868       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index));
5869 
5870   if (is_volatile) {
5871     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5872   }
5873 
5874   bool maybe_record_implicit_null_check_done = false;
5875 
5876   switch (field_type) {
5877     case DataType::Type::kBool:
5878     case DataType::Type::kUint8:
5879     case DataType::Type::kInt8: {
5880       if (value.IsConstant()) {
5881         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5882       } else {
5883         __ movb(field_addr, value.AsRegister<ByteRegister>());
5884       }
5885       break;
5886     }
5887 
5888     case DataType::Type::kUint16:
5889     case DataType::Type::kInt16: {
5890       if (value.IsConstant()) {
5891         __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5892       } else {
5893         __ movw(field_addr, value.AsRegister<Register>());
5894       }
5895       break;
5896     }
5897 
5898     case DataType::Type::kInt32:
5899     case DataType::Type::kReference: {
5900       if (kPoisonHeapReferences && needs_write_barrier) {
5901         // Note that in the case where `value` is a null reference,
5902         // we do not enter this block, as the reference does not
5903         // need poisoning.
5904         DCHECK_EQ(field_type, DataType::Type::kReference);
5905         Register temp = locations->GetTemp(0).AsRegister<Register>();
5906         __ movl(temp, value.AsRegister<Register>());
5907         __ PoisonHeapReference(temp);
5908         __ movl(field_addr, temp);
5909       } else if (value.IsConstant()) {
5910         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5911         __ movl(field_addr, Immediate(v));
5912       } else {
5913         DCHECK(value.IsRegister()) << value;
5914         __ movl(field_addr, value.AsRegister<Register>());
5915       }
5916       break;
5917     }
5918 
5919     case DataType::Type::kInt64: {
5920       if (is_volatile) {
5921         XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5922         XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5923         __ movd(temp1, value.AsRegisterPairLow<Register>());
5924         __ movd(temp2, value.AsRegisterPairHigh<Register>());
5925         __ punpckldq(temp1, temp2);
5926         __ movsd(field_addr, temp1);
5927         codegen_->MaybeRecordImplicitNullCheck(instruction);
5928       } else if (value.IsConstant()) {
5929         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5930         __ movl(field_addr, Immediate(Low32Bits(v)));
5931         codegen_->MaybeRecordImplicitNullCheck(instruction);
5932         __ movl(field_addr.displaceBy(kX86WordSize), Immediate(High32Bits(v)));
5933       } else {
5934         __ movl(field_addr, value.AsRegisterPairLow<Register>());
5935         codegen_->MaybeRecordImplicitNullCheck(instruction);
5936         __ movl(field_addr.displaceBy(kX86WordSize), value.AsRegisterPairHigh<Register>());
5937       }
5938       maybe_record_implicit_null_check_done = true;
5939       break;
5940     }
5941 
5942     case DataType::Type::kFloat32: {
5943       if (value.IsConstant()) {
5944         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5945         __ movl(field_addr, Immediate(v));
5946       } else {
5947         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5948       }
5949       break;
5950     }
5951 
5952     case DataType::Type::kFloat64: {
5953       if (value.IsConstant()) {
5954         DCHECK(!is_volatile);
5955         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5956         __ movl(field_addr, Immediate(Low32Bits(v)));
5957         codegen_->MaybeRecordImplicitNullCheck(instruction);
5958         __ movl(field_addr.displaceBy(kX86WordSize), Immediate(High32Bits(v)));
5959         maybe_record_implicit_null_check_done = true;
5960       } else {
5961         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5962       }
5963       break;
5964     }
5965 
5966     case DataType::Type::kUint32:
5967     case DataType::Type::kUint64:
5968     case DataType::Type::kVoid:
5969       LOG(FATAL) << "Unreachable type " << field_type;
5970       UNREACHABLE();
5971   }
5972 
5973   if (!maybe_record_implicit_null_check_done) {
5974     codegen_->MaybeRecordImplicitNullCheck(instruction);
5975   }
5976 
5977   if (needs_write_barrier) {
5978     Register temp = locations->GetTemp(0).AsRegister<Register>();
5979     Register card = locations->GetTemp(1).AsRegister<Register>();
5980     codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5981   }
5982 
5983   if (is_volatile) {
5984     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5985   }
5986 }
5987 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5988 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5989                                                  const FieldInfo& field_info,
5990                                                  bool value_can_be_null) {
5991   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5992 
5993   LocationSummary* locations = instruction->GetLocations();
5994   Register base = locations->InAt(0).AsRegister<Register>();
5995   bool is_volatile = field_info.IsVolatile();
5996   DataType::Type field_type = field_info.GetFieldType();
5997   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5998   bool is_predicated =
5999       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
6000 
6001   Address field_addr(base, offset);
6002 
6003   NearLabel pred_is_null;
6004   if (is_predicated) {
6005     __ testl(base, base);
6006     __ j(kEqual, &pred_is_null);
6007   }
6008 
6009   HandleFieldSet(instruction,
6010                  /* value_index= */ 1,
6011                  field_type,
6012                  field_addr,
6013                  base,
6014                  is_volatile,
6015                  value_can_be_null);
6016 
6017   if (is_predicated) {
6018     __ Bind(&pred_is_null);
6019   }
6020 }
6021 
VisitStaticFieldGet(HStaticFieldGet * instruction)6022 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6023   HandleFieldGet(instruction, instruction->GetFieldInfo());
6024 }
6025 
VisitStaticFieldGet(HStaticFieldGet * instruction)6026 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6027   HandleFieldGet(instruction, instruction->GetFieldInfo());
6028 }
6029 
VisitStaticFieldSet(HStaticFieldSet * instruction)6030 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6031   HandleFieldSet(instruction, instruction->GetFieldInfo());
6032 }
6033 
VisitStaticFieldSet(HStaticFieldSet * instruction)6034 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6035   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6036 }
6037 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6038 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6039   HandleFieldSet(instruction, instruction->GetFieldInfo());
6040 }
6041 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6042 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6043   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6044 }
6045 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6046 void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
6047     HPredicatedInstanceFieldGet* instruction) {
6048   HandleFieldGet(instruction, instruction->GetFieldInfo());
6049 }
6050 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6051 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6052   HandleFieldGet(instruction, instruction->GetFieldInfo());
6053 }
6054 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6055 void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
6056     HPredicatedInstanceFieldGet* instruction) {
6057   NearLabel finish;
6058   LocationSummary* locations = instruction->GetLocations();
6059   Register recv = locations->InAt(1).AsRegister<Register>();
6060   __ testl(recv, recv);
6061   __ j(kZero, &finish);
6062   HandleFieldGet(instruction, instruction->GetFieldInfo());
6063   __ Bind(&finish);
6064 }
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6065 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6066   HandleFieldGet(instruction, instruction->GetFieldInfo());
6067 }
6068 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6069 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6070   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6071 }
6072 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6073 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6074   __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6075   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6076 }
6077 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6078 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6079     HUnresolvedInstanceFieldGet* instruction) {
6080   FieldAccessCallingConventionX86 calling_convention;
6081   codegen_->CreateUnresolvedFieldLocationSummary(
6082       instruction, instruction->GetFieldType(), calling_convention);
6083 }
6084 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6085 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6086     HUnresolvedInstanceFieldGet* instruction) {
6087   FieldAccessCallingConventionX86 calling_convention;
6088   codegen_->GenerateUnresolvedFieldAccess(instruction,
6089                                           instruction->GetFieldType(),
6090                                           instruction->GetFieldIndex(),
6091                                           instruction->GetDexPc(),
6092                                           calling_convention);
6093 }
6094 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6095 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6096     HUnresolvedInstanceFieldSet* instruction) {
6097   FieldAccessCallingConventionX86 calling_convention;
6098   codegen_->CreateUnresolvedFieldLocationSummary(
6099       instruction, instruction->GetFieldType(), calling_convention);
6100 }
6101 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6102 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6103     HUnresolvedInstanceFieldSet* instruction) {
6104   FieldAccessCallingConventionX86 calling_convention;
6105   codegen_->GenerateUnresolvedFieldAccess(instruction,
6106                                           instruction->GetFieldType(),
6107                                           instruction->GetFieldIndex(),
6108                                           instruction->GetDexPc(),
6109                                           calling_convention);
6110 }
6111 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6112 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6113     HUnresolvedStaticFieldGet* instruction) {
6114   FieldAccessCallingConventionX86 calling_convention;
6115   codegen_->CreateUnresolvedFieldLocationSummary(
6116       instruction, instruction->GetFieldType(), calling_convention);
6117 }
6118 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6119 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6120     HUnresolvedStaticFieldGet* instruction) {
6121   FieldAccessCallingConventionX86 calling_convention;
6122   codegen_->GenerateUnresolvedFieldAccess(instruction,
6123                                           instruction->GetFieldType(),
6124                                           instruction->GetFieldIndex(),
6125                                           instruction->GetDexPc(),
6126                                           calling_convention);
6127 }
6128 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6129 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6130     HUnresolvedStaticFieldSet* instruction) {
6131   FieldAccessCallingConventionX86 calling_convention;
6132   codegen_->CreateUnresolvedFieldLocationSummary(
6133       instruction, instruction->GetFieldType(), calling_convention);
6134 }
6135 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6136 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6137     HUnresolvedStaticFieldSet* instruction) {
6138   FieldAccessCallingConventionX86 calling_convention;
6139   codegen_->GenerateUnresolvedFieldAccess(instruction,
6140                                           instruction->GetFieldType(),
6141                                           instruction->GetFieldIndex(),
6142                                           instruction->GetDexPc(),
6143                                           calling_convention);
6144 }
6145 
VisitNullCheck(HNullCheck * instruction)6146 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6147   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6148   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6149       ? Location::RequiresRegister()
6150       : Location::Any();
6151   locations->SetInAt(0, loc);
6152 }
6153 
GenerateImplicitNullCheck(HNullCheck * instruction)6154 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6155   if (CanMoveNullCheckToUser(instruction)) {
6156     return;
6157   }
6158   LocationSummary* locations = instruction->GetLocations();
6159   Location obj = locations->InAt(0);
6160 
6161   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6162   RecordPcInfo(instruction, instruction->GetDexPc());
6163 }
6164 
GenerateExplicitNullCheck(HNullCheck * instruction)6165 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6166   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6167   AddSlowPath(slow_path);
6168 
6169   LocationSummary* locations = instruction->GetLocations();
6170   Location obj = locations->InAt(0);
6171 
6172   if (obj.IsRegister()) {
6173     __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6174   } else if (obj.IsStackSlot()) {
6175     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6176   } else {
6177     DCHECK(obj.IsConstant()) << obj;
6178     DCHECK(obj.GetConstant()->IsNullConstant());
6179     __ jmp(slow_path->GetEntryLabel());
6180     return;
6181   }
6182   __ j(kEqual, slow_path->GetEntryLabel());
6183 }
6184 
VisitNullCheck(HNullCheck * instruction)6185 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6186   codegen_->GenerateNullCheck(instruction);
6187 }
6188 
VisitArrayGet(HArrayGet * instruction)6189 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6190   bool object_array_get_with_read_barrier =
6191       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6192   LocationSummary* locations =
6193       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6194                                                        object_array_get_with_read_barrier
6195                                                            ? LocationSummary::kCallOnSlowPath
6196                                                            : LocationSummary::kNoCall);
6197   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6198     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6199   }
6200   locations->SetInAt(0, Location::RequiresRegister());
6201   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6202   if (DataType::IsFloatingPointType(instruction->GetType())) {
6203     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6204   } else {
6205     // The output overlaps in case of long: we don't want the low move
6206     // to overwrite the array's location.  Likewise, in the case of an
6207     // object array get with read barriers enabled, we do not want the
6208     // move to overwrite the array's location, as we need it to emit
6209     // the read barrier.
6210     locations->SetOut(
6211         Location::RequiresRegister(),
6212         (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6213             ? Location::kOutputOverlap
6214             : Location::kNoOutputOverlap);
6215   }
6216 }
6217 
VisitArrayGet(HArrayGet * instruction)6218 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6219   LocationSummary* locations = instruction->GetLocations();
6220   Location obj_loc = locations->InAt(0);
6221   Register obj = obj_loc.AsRegister<Register>();
6222   Location index = locations->InAt(1);
6223   Location out_loc = locations->Out();
6224   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6225 
6226   DataType::Type type = instruction->GetType();
6227   switch (type) {
6228     case DataType::Type::kBool:
6229     case DataType::Type::kUint8: {
6230       Register out = out_loc.AsRegister<Register>();
6231       __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6232       break;
6233     }
6234 
6235     case DataType::Type::kInt8: {
6236       Register out = out_loc.AsRegister<Register>();
6237       __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6238       break;
6239     }
6240 
6241     case DataType::Type::kUint16: {
6242       Register out = out_loc.AsRegister<Register>();
6243       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6244         // Branch cases into compressed and uncompressed for each index's type.
6245         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6246         NearLabel done, not_compressed;
6247         __ testb(Address(obj, count_offset), Immediate(1));
6248         codegen_->MaybeRecordImplicitNullCheck(instruction);
6249         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6250                       "Expecting 0=compressed, 1=uncompressed");
6251         __ j(kNotZero, &not_compressed);
6252         __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6253         __ jmp(&done);
6254         __ Bind(&not_compressed);
6255         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6256         __ Bind(&done);
6257       } else {
6258         // Common case for charAt of array of char or when string compression's
6259         // feature is turned off.
6260         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6261       }
6262       break;
6263     }
6264 
6265     case DataType::Type::kInt16: {
6266       Register out = out_loc.AsRegister<Register>();
6267       __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6268       break;
6269     }
6270 
6271     case DataType::Type::kInt32: {
6272       Register out = out_loc.AsRegister<Register>();
6273       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6274       break;
6275     }
6276 
6277     case DataType::Type::kReference: {
6278       static_assert(
6279           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6280           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6281       // /* HeapReference<Object> */ out =
6282       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6283       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6284         // Note that a potential implicit null check is handled in this
6285         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6286         codegen_->GenerateArrayLoadWithBakerReadBarrier(
6287             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6288       } else {
6289         Register out = out_loc.AsRegister<Register>();
6290         __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6291         codegen_->MaybeRecordImplicitNullCheck(instruction);
6292         // If read barriers are enabled, emit read barriers other than
6293         // Baker's using a slow path (and also unpoison the loaded
6294         // reference, if heap poisoning is enabled).
6295         if (index.IsConstant()) {
6296           uint32_t offset =
6297               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6298           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6299         } else {
6300           codegen_->MaybeGenerateReadBarrierSlow(
6301               instruction, out_loc, out_loc, obj_loc, data_offset, index);
6302         }
6303       }
6304       break;
6305     }
6306 
6307     case DataType::Type::kInt64: {
6308       DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
6309       __ movl(out_loc.AsRegisterPairLow<Register>(),
6310               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
6311       codegen_->MaybeRecordImplicitNullCheck(instruction);
6312       __ movl(out_loc.AsRegisterPairHigh<Register>(),
6313               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
6314       break;
6315     }
6316 
6317     case DataType::Type::kFloat32: {
6318       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
6319       __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6320       break;
6321     }
6322 
6323     case DataType::Type::kFloat64: {
6324       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
6325       __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
6326       break;
6327     }
6328 
6329     case DataType::Type::kUint32:
6330     case DataType::Type::kUint64:
6331     case DataType::Type::kVoid:
6332       LOG(FATAL) << "Unreachable type " << type;
6333       UNREACHABLE();
6334   }
6335 
6336   if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
6337     // Potential implicit null checks, in the case of reference or
6338     // long arrays, are handled in the previous switch statement.
6339   } else {
6340     codegen_->MaybeRecordImplicitNullCheck(instruction);
6341   }
6342 }
6343 
VisitArraySet(HArraySet * instruction)6344 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6345   DataType::Type value_type = instruction->GetComponentType();
6346 
6347   bool needs_write_barrier =
6348       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6349   bool needs_type_check = instruction->NeedsTypeCheck();
6350 
6351   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6352       instruction,
6353       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6354 
6355   bool is_byte_type = DataType::Size(value_type) == 1u;
6356   // We need the inputs to be different than the output in case of long operation.
6357   // In case of a byte operation, the register allocator does not support multiple
6358   // inputs that die at entry with one in a specific register.
6359   locations->SetInAt(0, Location::RequiresRegister());
6360   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6361   if (is_byte_type) {
6362     // Ensure the value is in a byte register.
6363     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6364   } else if (DataType::IsFloatingPointType(value_type)) {
6365     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6366   } else {
6367     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6368   }
6369   if (needs_write_barrier) {
6370     // Temporary registers for the write barrier.
6371     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
6372     // Ensure the card is in a byte register.
6373     locations->AddTemp(Location::RegisterLocation(ECX));
6374   }
6375 }
6376 
VisitArraySet(HArraySet * instruction)6377 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6378   LocationSummary* locations = instruction->GetLocations();
6379   Location array_loc = locations->InAt(0);
6380   Register array = array_loc.AsRegister<Register>();
6381   Location index = locations->InAt(1);
6382   Location value = locations->InAt(2);
6383   DataType::Type value_type = instruction->GetComponentType();
6384   bool needs_type_check = instruction->NeedsTypeCheck();
6385   bool needs_write_barrier =
6386       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6387 
6388   switch (value_type) {
6389     case DataType::Type::kBool:
6390     case DataType::Type::kUint8:
6391     case DataType::Type::kInt8: {
6392       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6393       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6394       if (value.IsRegister()) {
6395         __ movb(address, value.AsRegister<ByteRegister>());
6396       } else {
6397         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6398       }
6399       codegen_->MaybeRecordImplicitNullCheck(instruction);
6400       break;
6401     }
6402 
6403     case DataType::Type::kUint16:
6404     case DataType::Type::kInt16: {
6405       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6406       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6407       if (value.IsRegister()) {
6408         __ movw(address, value.AsRegister<Register>());
6409       } else {
6410         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6411       }
6412       codegen_->MaybeRecordImplicitNullCheck(instruction);
6413       break;
6414     }
6415 
6416     case DataType::Type::kReference: {
6417       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6418       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6419 
6420       if (!value.IsRegister()) {
6421         // Just setting null.
6422         DCHECK(instruction->InputAt(2)->IsNullConstant());
6423         DCHECK(value.IsConstant()) << value;
6424         __ movl(address, Immediate(0));
6425         codegen_->MaybeRecordImplicitNullCheck(instruction);
6426         DCHECK(!needs_write_barrier);
6427         DCHECK(!needs_type_check);
6428         break;
6429       }
6430 
6431       DCHECK(needs_write_barrier);
6432       Register register_value = value.AsRegister<Register>();
6433       Location temp_loc = locations->GetTemp(0);
6434       Register temp = temp_loc.AsRegister<Register>();
6435 
6436       bool can_value_be_null = instruction->GetValueCanBeNull();
6437       NearLabel do_store;
6438       if (can_value_be_null) {
6439         __ testl(register_value, register_value);
6440         __ j(kEqual, &do_store);
6441       }
6442 
6443       SlowPathCode* slow_path = nullptr;
6444       if (needs_type_check) {
6445         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6446         codegen_->AddSlowPath(slow_path);
6447 
6448         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6449         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6450         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6451 
6452         // Note that when Baker read barriers are enabled, the type
6453         // checks are performed without read barriers.  This is fine,
6454         // even in the case where a class object is in the from-space
6455         // after the flip, as a comparison involving such a type would
6456         // not produce a false positive; it may of course produce a
6457         // false negative, in which case we would take the ArraySet
6458         // slow path.
6459 
6460         // /* HeapReference<Class> */ temp = array->klass_
6461         __ movl(temp, Address(array, class_offset));
6462         codegen_->MaybeRecordImplicitNullCheck(instruction);
6463         __ MaybeUnpoisonHeapReference(temp);
6464 
6465         // /* HeapReference<Class> */ temp = temp->component_type_
6466         __ movl(temp, Address(temp, component_offset));
6467         // If heap poisoning is enabled, no need to unpoison `temp`
6468         // nor the object reference in `register_value->klass`, as
6469         // we are comparing two poisoned references.
6470         __ cmpl(temp, Address(register_value, class_offset));
6471 
6472         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6473           NearLabel do_put;
6474           __ j(kEqual, &do_put);
6475           // If heap poisoning is enabled, the `temp` reference has
6476           // not been unpoisoned yet; unpoison it now.
6477           __ MaybeUnpoisonHeapReference(temp);
6478 
6479           // If heap poisoning is enabled, no need to unpoison the
6480           // heap reference loaded below, as it is only used for a
6481           // comparison with null.
6482           __ cmpl(Address(temp, super_offset), Immediate(0));
6483           __ j(kNotEqual, slow_path->GetEntryLabel());
6484           __ Bind(&do_put);
6485         } else {
6486           __ j(kNotEqual, slow_path->GetEntryLabel());
6487         }
6488       }
6489 
6490       Register card = locations->GetTemp(1).AsRegister<Register>();
6491       codegen_->MarkGCCard(
6492           temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6493 
6494       if (can_value_be_null) {
6495         DCHECK(do_store.IsLinked());
6496         __ Bind(&do_store);
6497       }
6498 
6499       Register source = register_value;
6500       if (kPoisonHeapReferences) {
6501         __ movl(temp, register_value);
6502         __ PoisonHeapReference(temp);
6503         source = temp;
6504       }
6505 
6506       __ movl(address, source);
6507 
6508       if (can_value_be_null || !needs_type_check) {
6509         codegen_->MaybeRecordImplicitNullCheck(instruction);
6510       }
6511 
6512       if (slow_path != nullptr) {
6513         __ Bind(slow_path->GetExitLabel());
6514       }
6515 
6516       break;
6517     }
6518 
6519     case DataType::Type::kInt32: {
6520       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6521       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6522       if (value.IsRegister()) {
6523         __ movl(address, value.AsRegister<Register>());
6524       } else {
6525         DCHECK(value.IsConstant()) << value;
6526         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6527         __ movl(address, Immediate(v));
6528       }
6529       codegen_->MaybeRecordImplicitNullCheck(instruction);
6530       break;
6531     }
6532 
6533     case DataType::Type::kInt64: {
6534       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6535       if (value.IsRegisterPair()) {
6536         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6537                 value.AsRegisterPairLow<Register>());
6538         codegen_->MaybeRecordImplicitNullCheck(instruction);
6539         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6540                 value.AsRegisterPairHigh<Register>());
6541       } else {
6542         DCHECK(value.IsConstant());
6543         int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6544         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6545                 Immediate(Low32Bits(val)));
6546         codegen_->MaybeRecordImplicitNullCheck(instruction);
6547         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6548                 Immediate(High32Bits(val)));
6549       }
6550       break;
6551     }
6552 
6553     case DataType::Type::kFloat32: {
6554       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6555       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6556       if (value.IsFpuRegister()) {
6557         __ movss(address, value.AsFpuRegister<XmmRegister>());
6558       } else {
6559         DCHECK(value.IsConstant());
6560         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6561         __ movl(address, Immediate(v));
6562       }
6563       codegen_->MaybeRecordImplicitNullCheck(instruction);
6564       break;
6565     }
6566 
6567     case DataType::Type::kFloat64: {
6568       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6569       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6570       if (value.IsFpuRegister()) {
6571         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6572       } else {
6573         DCHECK(value.IsConstant());
6574         Address address_hi =
6575             CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6576         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6577         __ movl(address, Immediate(Low32Bits(v)));
6578         codegen_->MaybeRecordImplicitNullCheck(instruction);
6579         __ movl(address_hi, Immediate(High32Bits(v)));
6580       }
6581       break;
6582     }
6583 
6584     case DataType::Type::kUint32:
6585     case DataType::Type::kUint64:
6586     case DataType::Type::kVoid:
6587       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6588       UNREACHABLE();
6589   }
6590 }
6591 
VisitArrayLength(HArrayLength * instruction)6592 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6593   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6594   locations->SetInAt(0, Location::RequiresRegister());
6595   if (!instruction->IsEmittedAtUseSite()) {
6596     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6597   }
6598 }
6599 
VisitArrayLength(HArrayLength * instruction)6600 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6601   if (instruction->IsEmittedAtUseSite()) {
6602     return;
6603   }
6604 
6605   LocationSummary* locations = instruction->GetLocations();
6606   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6607   Register obj = locations->InAt(0).AsRegister<Register>();
6608   Register out = locations->Out().AsRegister<Register>();
6609   __ movl(out, Address(obj, offset));
6610   codegen_->MaybeRecordImplicitNullCheck(instruction);
6611   // Mask out most significant bit in case the array is String's array of char.
6612   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6613     __ shrl(out, Immediate(1));
6614   }
6615 }
6616 
VisitBoundsCheck(HBoundsCheck * instruction)6617 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6618   RegisterSet caller_saves = RegisterSet::Empty();
6619   InvokeRuntimeCallingConvention calling_convention;
6620   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6621   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6622   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6623   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6624   HInstruction* length = instruction->InputAt(1);
6625   if (!length->IsEmittedAtUseSite()) {
6626     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6627   }
6628   // Need register to see array's length.
6629   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6630     locations->AddTemp(Location::RequiresRegister());
6631   }
6632 }
6633 
VisitBoundsCheck(HBoundsCheck * instruction)6634 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6635   const bool is_string_compressed_char_at =
6636       mirror::kUseStringCompression && instruction->IsStringCharAt();
6637   LocationSummary* locations = instruction->GetLocations();
6638   Location index_loc = locations->InAt(0);
6639   Location length_loc = locations->InAt(1);
6640   SlowPathCode* slow_path =
6641     new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6642 
6643   if (length_loc.IsConstant()) {
6644     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6645     if (index_loc.IsConstant()) {
6646       // BCE will remove the bounds check if we are guarenteed to pass.
6647       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6648       if (index < 0 || index >= length) {
6649         codegen_->AddSlowPath(slow_path);
6650         __ jmp(slow_path->GetEntryLabel());
6651       } else {
6652         // Some optimization after BCE may have generated this, and we should not
6653         // generate a bounds check if it is a valid range.
6654       }
6655       return;
6656     }
6657 
6658     // We have to reverse the jump condition because the length is the constant.
6659     Register index_reg = index_loc.AsRegister<Register>();
6660     __ cmpl(index_reg, Immediate(length));
6661     codegen_->AddSlowPath(slow_path);
6662     __ j(kAboveEqual, slow_path->GetEntryLabel());
6663   } else {
6664     HInstruction* array_length = instruction->InputAt(1);
6665     if (array_length->IsEmittedAtUseSite()) {
6666       // Address the length field in the array.
6667       DCHECK(array_length->IsArrayLength());
6668       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6669       Location array_loc = array_length->GetLocations()->InAt(0);
6670       Address array_len(array_loc.AsRegister<Register>(), len_offset);
6671       if (is_string_compressed_char_at) {
6672         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6673         // the string compression flag) with the in-memory length and avoid the temporary.
6674         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6675         __ movl(length_reg, array_len);
6676         codegen_->MaybeRecordImplicitNullCheck(array_length);
6677         __ shrl(length_reg, Immediate(1));
6678         codegen_->GenerateIntCompare(length_reg, index_loc);
6679       } else {
6680         // Checking bounds for general case:
6681         // Array of char or string's array with feature compression off.
6682         if (index_loc.IsConstant()) {
6683           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6684           __ cmpl(array_len, Immediate(value));
6685         } else {
6686           __ cmpl(array_len, index_loc.AsRegister<Register>());
6687         }
6688         codegen_->MaybeRecordImplicitNullCheck(array_length);
6689       }
6690     } else {
6691       codegen_->GenerateIntCompare(length_loc, index_loc);
6692     }
6693     codegen_->AddSlowPath(slow_path);
6694     __ j(kBelowEqual, slow_path->GetEntryLabel());
6695   }
6696 }
6697 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6698 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6699   LOG(FATAL) << "Unreachable";
6700 }
6701 
VisitParallelMove(HParallelMove * instruction)6702 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6703   if (instruction->GetNext()->IsSuspendCheck() &&
6704       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6705     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6706     // The back edge will generate the suspend check.
6707     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6708   }
6709 
6710   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6711 }
6712 
VisitSuspendCheck(HSuspendCheck * instruction)6713 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6714   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6715       instruction, LocationSummary::kCallOnSlowPath);
6716   // In suspend check slow path, usually there are no caller-save registers at all.
6717   // If SIMD instructions are present, however, we force spilling all live SIMD
6718   // registers in full width (since the runtime only saves/restores lower part).
6719   locations->SetCustomSlowPathCallerSaves(
6720       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6721 }
6722 
VisitSuspendCheck(HSuspendCheck * instruction)6723 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6724   HBasicBlock* block = instruction->GetBlock();
6725   if (block->GetLoopInformation() != nullptr) {
6726     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6727     // The back edge will generate the suspend check.
6728     return;
6729   }
6730   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6731     // The goto will generate the suspend check.
6732     return;
6733   }
6734   GenerateSuspendCheck(instruction, nullptr);
6735 }
6736 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6737 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6738                                                        HBasicBlock* successor) {
6739   SuspendCheckSlowPathX86* slow_path =
6740       down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6741   if (slow_path == nullptr) {
6742     slow_path =
6743         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6744     instruction->SetSlowPath(slow_path);
6745     codegen_->AddSlowPath(slow_path);
6746     if (successor != nullptr) {
6747       DCHECK(successor->IsLoopHeader());
6748     }
6749   } else {
6750     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6751   }
6752 
6753   __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6754                 Immediate(0));
6755   if (successor == nullptr) {
6756     __ j(kNotEqual, slow_path->GetEntryLabel());
6757     __ Bind(slow_path->GetReturnLabel());
6758   } else {
6759     __ j(kEqual, codegen_->GetLabelOf(successor));
6760     __ jmp(slow_path->GetEntryLabel());
6761   }
6762 }
6763 
GetAssembler() const6764 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6765   return codegen_->GetAssembler();
6766 }
6767 
MoveMemoryToMemory(int dst,int src,int number_of_words)6768 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6769   ScratchRegisterScope ensure_scratch(
6770       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6771   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6772   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6773 
6774   // Now that temp register is available (possibly spilled), move blocks of memory.
6775   for (int i = 0; i < number_of_words; i++) {
6776     __ movl(temp_reg, Address(ESP, src + stack_offset));
6777     __ movl(Address(ESP, dst + stack_offset), temp_reg);
6778     stack_offset += kX86WordSize;
6779   }
6780 }
6781 
EmitMove(size_t index)6782 void ParallelMoveResolverX86::EmitMove(size_t index) {
6783   MoveOperands* move = moves_[index];
6784   Location source = move->GetSource();
6785   Location destination = move->GetDestination();
6786 
6787   if (source.IsRegister()) {
6788     if (destination.IsRegister()) {
6789       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6790     } else if (destination.IsFpuRegister()) {
6791       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6792     } else {
6793       DCHECK(destination.IsStackSlot());
6794       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6795     }
6796   } else if (source.IsRegisterPair()) {
6797     if (destination.IsRegisterPair()) {
6798       __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6799       DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6800       __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6801     } else if (destination.IsFpuRegister()) {
6802       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6803       // Push the 2 source registers to the stack.
6804       __ pushl(source.AsRegisterPairHigh<Register>());
6805       __ cfi().AdjustCFAOffset(elem_size);
6806       __ pushl(source.AsRegisterPairLow<Register>());
6807       __ cfi().AdjustCFAOffset(elem_size);
6808       // Load the destination register.
6809       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6810       // And remove the temporary stack space we allocated.
6811       codegen_->DecreaseFrame(2 * elem_size);
6812     } else {
6813       DCHECK(destination.IsDoubleStackSlot());
6814       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6815       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6816               source.AsRegisterPairHigh<Register>());
6817     }
6818   } else if (source.IsFpuRegister()) {
6819     if (destination.IsRegister()) {
6820       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6821     } else if (destination.IsFpuRegister()) {
6822       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6823     } else if (destination.IsRegisterPair()) {
6824       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6825       // Create stack space for 2 elements.
6826       codegen_->IncreaseFrame(2 * elem_size);
6827       // Store the source register.
6828       __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6829       // And pop the values into destination registers.
6830       __ popl(destination.AsRegisterPairLow<Register>());
6831       __ cfi().AdjustCFAOffset(-elem_size);
6832       __ popl(destination.AsRegisterPairHigh<Register>());
6833       __ cfi().AdjustCFAOffset(-elem_size);
6834     } else if (destination.IsStackSlot()) {
6835       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6836     } else if (destination.IsDoubleStackSlot()) {
6837       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6838     } else {
6839       DCHECK(destination.IsSIMDStackSlot());
6840       __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6841     }
6842   } else if (source.IsStackSlot()) {
6843     if (destination.IsRegister()) {
6844       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6845     } else if (destination.IsFpuRegister()) {
6846       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6847     } else {
6848       DCHECK(destination.IsStackSlot());
6849       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6850     }
6851   } else if (source.IsDoubleStackSlot()) {
6852     if (destination.IsRegisterPair()) {
6853       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6854       __ movl(destination.AsRegisterPairHigh<Register>(),
6855               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6856     } else if (destination.IsFpuRegister()) {
6857       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6858     } else {
6859       DCHECK(destination.IsDoubleStackSlot()) << destination;
6860       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6861     }
6862   } else if (source.IsSIMDStackSlot()) {
6863     if (destination.IsFpuRegister()) {
6864       __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6865     } else {
6866       DCHECK(destination.IsSIMDStackSlot());
6867       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6868     }
6869   } else if (source.IsConstant()) {
6870     HConstant* constant = source.GetConstant();
6871     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6872       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6873       if (destination.IsRegister()) {
6874         if (value == 0) {
6875           __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6876         } else {
6877           __ movl(destination.AsRegister<Register>(), Immediate(value));
6878         }
6879       } else {
6880         DCHECK(destination.IsStackSlot()) << destination;
6881         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6882       }
6883     } else if (constant->IsFloatConstant()) {
6884       float fp_value = constant->AsFloatConstant()->GetValue();
6885       int32_t value = bit_cast<int32_t, float>(fp_value);
6886       Immediate imm(value);
6887       if (destination.IsFpuRegister()) {
6888         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6889         if (value == 0) {
6890           // Easy handling of 0.0.
6891           __ xorps(dest, dest);
6892         } else {
6893           ScratchRegisterScope ensure_scratch(
6894               this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6895           Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6896           __ movl(temp, Immediate(value));
6897           __ movd(dest, temp);
6898         }
6899       } else {
6900         DCHECK(destination.IsStackSlot()) << destination;
6901         __ movl(Address(ESP, destination.GetStackIndex()), imm);
6902       }
6903     } else if (constant->IsLongConstant()) {
6904       int64_t value = constant->AsLongConstant()->GetValue();
6905       int32_t low_value = Low32Bits(value);
6906       int32_t high_value = High32Bits(value);
6907       Immediate low(low_value);
6908       Immediate high(high_value);
6909       if (destination.IsDoubleStackSlot()) {
6910         __ movl(Address(ESP, destination.GetStackIndex()), low);
6911         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6912       } else {
6913         __ movl(destination.AsRegisterPairLow<Register>(), low);
6914         __ movl(destination.AsRegisterPairHigh<Register>(), high);
6915       }
6916     } else {
6917       DCHECK(constant->IsDoubleConstant());
6918       double dbl_value = constant->AsDoubleConstant()->GetValue();
6919       int64_t value = bit_cast<int64_t, double>(dbl_value);
6920       int32_t low_value = Low32Bits(value);
6921       int32_t high_value = High32Bits(value);
6922       Immediate low(low_value);
6923       Immediate high(high_value);
6924       if (destination.IsFpuRegister()) {
6925         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6926         if (value == 0) {
6927           // Easy handling of 0.0.
6928           __ xorpd(dest, dest);
6929         } else {
6930           __ pushl(high);
6931           __ cfi().AdjustCFAOffset(4);
6932           __ pushl(low);
6933           __ cfi().AdjustCFAOffset(4);
6934           __ movsd(dest, Address(ESP, 0));
6935           codegen_->DecreaseFrame(8);
6936         }
6937       } else {
6938         DCHECK(destination.IsDoubleStackSlot()) << destination;
6939         __ movl(Address(ESP, destination.GetStackIndex()), low);
6940         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6941       }
6942     }
6943   } else {
6944     LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6945   }
6946 }
6947 
Exchange(Register reg,int mem)6948 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6949   Register suggested_scratch = reg == EAX ? EBX : EAX;
6950   ScratchRegisterScope ensure_scratch(
6951       this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6952 
6953   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6954   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6955   __ movl(Address(ESP, mem + stack_offset), reg);
6956   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6957 }
6958 
Exchange32(XmmRegister reg,int mem)6959 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6960   ScratchRegisterScope ensure_scratch(
6961       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6962 
6963   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6964   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6965   __ movl(temp_reg, Address(ESP, mem + stack_offset));
6966   __ movss(Address(ESP, mem + stack_offset), reg);
6967   __ movd(reg, temp_reg);
6968 }
6969 
Exchange128(XmmRegister reg,int mem)6970 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6971   size_t extra_slot = 4 * kX86WordSize;
6972   codegen_->IncreaseFrame(extra_slot);
6973   __ movups(Address(ESP, 0), XmmRegister(reg));
6974   ExchangeMemory(0, mem + extra_slot, 4);
6975   __ movups(XmmRegister(reg), Address(ESP, 0));
6976   codegen_->DecreaseFrame(extra_slot);
6977 }
6978 
ExchangeMemory(int mem1,int mem2,int number_of_words)6979 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6980   ScratchRegisterScope ensure_scratch1(
6981       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6982 
6983   Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6984   ScratchRegisterScope ensure_scratch2(
6985       this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6986 
6987   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6988   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6989 
6990   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6991   for (int i = 0; i < number_of_words; i++) {
6992     __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6993     __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6994     __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6995     __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6996     stack_offset += kX86WordSize;
6997   }
6998 }
6999 
EmitSwap(size_t index)7000 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7001   MoveOperands* move = moves_[index];
7002   Location source = move->GetSource();
7003   Location destination = move->GetDestination();
7004 
7005   if (source.IsRegister() && destination.IsRegister()) {
7006     // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7007     DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7008     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7009     __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7010     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7011   } else if (source.IsRegister() && destination.IsStackSlot()) {
7012     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7013   } else if (source.IsStackSlot() && destination.IsRegister()) {
7014     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7015   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7016     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7017   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7018     // Use XOR Swap algorithm to avoid a temporary.
7019     DCHECK_NE(source.reg(), destination.reg());
7020     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7021     __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7022     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7023   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7024     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7025   } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7026     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7027   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7028     // Take advantage of the 16 bytes in the XMM register.
7029     XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7030     Address stack(ESP, destination.GetStackIndex());
7031     // Load the double into the high doubleword.
7032     __ movhpd(reg, stack);
7033 
7034     // Store the low double into the destination.
7035     __ movsd(stack, reg);
7036 
7037     // Move the high double to the low double.
7038     __ psrldq(reg, Immediate(8));
7039   } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7040     // Take advantage of the 16 bytes in the XMM register.
7041     XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7042     Address stack(ESP, source.GetStackIndex());
7043     // Load the double into the high doubleword.
7044     __ movhpd(reg, stack);
7045 
7046     // Store the low double into the destination.
7047     __ movsd(stack, reg);
7048 
7049     // Move the high double to the low double.
7050     __ psrldq(reg, Immediate(8));
7051   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7052     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7053   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7054     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7055   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7056     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7057   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7058     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7059   } else {
7060     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7061   }
7062 }
7063 
SpillScratch(int reg)7064 void ParallelMoveResolverX86::SpillScratch(int reg) {
7065   __ pushl(static_cast<Register>(reg));
7066 }
7067 
RestoreScratch(int reg)7068 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7069   __ popl(static_cast<Register>(reg));
7070 }
7071 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7072 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7073     HLoadClass::LoadKind desired_class_load_kind) {
7074   switch (desired_class_load_kind) {
7075     case HLoadClass::LoadKind::kInvalid:
7076       LOG(FATAL) << "UNREACHABLE";
7077       UNREACHABLE();
7078     case HLoadClass::LoadKind::kReferrersClass:
7079       break;
7080     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7081     case HLoadClass::LoadKind::kBootImageRelRo:
7082     case HLoadClass::LoadKind::kBssEntry:
7083     case HLoadClass::LoadKind::kBssEntryPublic:
7084     case HLoadClass::LoadKind::kBssEntryPackage:
7085       DCHECK(!GetCompilerOptions().IsJitCompiler());
7086       break;
7087     case HLoadClass::LoadKind::kJitBootImageAddress:
7088     case HLoadClass::LoadKind::kJitTableAddress:
7089       DCHECK(GetCompilerOptions().IsJitCompiler());
7090       break;
7091     case HLoadClass::LoadKind::kRuntimeCall:
7092       break;
7093   }
7094   return desired_class_load_kind;
7095 }
7096 
VisitLoadClass(HLoadClass * cls)7097 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7098   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7099   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7100     InvokeRuntimeCallingConvention calling_convention;
7101     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7102         cls,
7103         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7104         Location::RegisterLocation(EAX));
7105     DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7106     return;
7107   }
7108   DCHECK_EQ(cls->NeedsAccessCheck(),
7109             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7110                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7111 
7112   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
7113   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7114       ? LocationSummary::kCallOnSlowPath
7115       : LocationSummary::kNoCall;
7116   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7117   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7118     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7119   }
7120 
7121   if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7122     locations->SetInAt(0, Location::RequiresRegister());
7123   }
7124   locations->SetOut(Location::RequiresRegister());
7125   if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7126     if (!kUseReadBarrier || kUseBakerReadBarrier) {
7127       // Rely on the type resolution and/or initialization to save everything.
7128       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7129     } else {
7130       // For non-Baker read barrier we have a temp-clobbering call.
7131     }
7132   }
7133 }
7134 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7135 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7136                                               dex::TypeIndex type_index,
7137                                               Handle<mirror::Class> handle) {
7138   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7139   // Add a patch entry and return the label.
7140   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7141   PatchInfo<Label>* info = &jit_class_patches_.back();
7142   return &info->label;
7143 }
7144 
7145 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7146 // move.
VisitLoadClass(HLoadClass * cls)7147 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7148   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7149   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7150     codegen_->GenerateLoadClassRuntimeCall(cls);
7151     return;
7152   }
7153   DCHECK_EQ(cls->NeedsAccessCheck(),
7154             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7155                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7156 
7157   LocationSummary* locations = cls->GetLocations();
7158   Location out_loc = locations->Out();
7159   Register out = out_loc.AsRegister<Register>();
7160 
7161   bool generate_null_check = false;
7162   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7163       ? kWithoutReadBarrier
7164       : kCompilerReadBarrierOption;
7165   switch (load_kind) {
7166     case HLoadClass::LoadKind::kReferrersClass: {
7167       DCHECK(!cls->CanCallRuntime());
7168       DCHECK(!cls->MustGenerateClinitCheck());
7169       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7170       Register current_method = locations->InAt(0).AsRegister<Register>();
7171       GenerateGcRootFieldLoad(
7172           cls,
7173           out_loc,
7174           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7175           /* fixup_label= */ nullptr,
7176           read_barrier_option);
7177       break;
7178     }
7179     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7180       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7181              codegen_->GetCompilerOptions().IsBootImageExtension());
7182       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7183       Register method_address = locations->InAt(0).AsRegister<Register>();
7184       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7185       codegen_->RecordBootImageTypePatch(cls);
7186       break;
7187     }
7188     case HLoadClass::LoadKind::kBootImageRelRo: {
7189       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7190       Register method_address = locations->InAt(0).AsRegister<Register>();
7191       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7192       codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7193                                           CodeGenerator::GetBootImageOffset(cls));
7194       break;
7195     }
7196     case HLoadClass::LoadKind::kBssEntry:
7197     case HLoadClass::LoadKind::kBssEntryPublic:
7198     case HLoadClass::LoadKind::kBssEntryPackage: {
7199       Register method_address = locations->InAt(0).AsRegister<Register>();
7200       Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7201       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7202       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7203       // No need for memory fence, thanks to the x86 memory model.
7204       generate_null_check = true;
7205       break;
7206     }
7207     case HLoadClass::LoadKind::kJitBootImageAddress: {
7208       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7209       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7210       DCHECK_NE(address, 0u);
7211       __ movl(out, Immediate(address));
7212       break;
7213     }
7214     case HLoadClass::LoadKind::kJitTableAddress: {
7215       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7216       Label* fixup_label = codegen_->NewJitRootClassPatch(
7217           cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7218       // /* GcRoot<mirror::Class> */ out = *address
7219       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7220       break;
7221     }
7222     case HLoadClass::LoadKind::kRuntimeCall:
7223     case HLoadClass::LoadKind::kInvalid:
7224       LOG(FATAL) << "UNREACHABLE";
7225       UNREACHABLE();
7226   }
7227 
7228   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7229     DCHECK(cls->CanCallRuntime());
7230     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7231     codegen_->AddSlowPath(slow_path);
7232 
7233     if (generate_null_check) {
7234       __ testl(out, out);
7235       __ j(kEqual, slow_path->GetEntryLabel());
7236     }
7237 
7238     if (cls->MustGenerateClinitCheck()) {
7239       GenerateClassInitializationCheck(slow_path, out);
7240     } else {
7241       __ Bind(slow_path->GetExitLabel());
7242     }
7243   }
7244 }
7245 
VisitLoadMethodHandle(HLoadMethodHandle * load)7246 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7247   InvokeRuntimeCallingConvention calling_convention;
7248   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7249   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7250 }
7251 
VisitLoadMethodHandle(HLoadMethodHandle * load)7252 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7253   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7254 }
7255 
VisitLoadMethodType(HLoadMethodType * load)7256 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7257   InvokeRuntimeCallingConvention calling_convention;
7258   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7259   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7260 }
7261 
VisitLoadMethodType(HLoadMethodType * load)7262 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7263   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7264 }
7265 
VisitClinitCheck(HClinitCheck * check)7266 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7267   LocationSummary* locations =
7268       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7269   locations->SetInAt(0, Location::RequiresRegister());
7270   if (check->HasUses()) {
7271     locations->SetOut(Location::SameAsFirstInput());
7272   }
7273   // Rely on the type initialization to save everything we need.
7274   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7275 }
7276 
VisitClinitCheck(HClinitCheck * check)7277 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7278   // We assume the class to not be null.
7279   SlowPathCode* slow_path =
7280       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7281   codegen_->AddSlowPath(slow_path);
7282   GenerateClassInitializationCheck(slow_path,
7283                                    check->GetLocations()->InAt(0).AsRegister<Register>());
7284 }
7285 
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7286 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7287     SlowPathCode* slow_path, Register class_reg) {
7288   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
7289   const size_t status_byte_offset =
7290       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
7291   constexpr uint32_t shifted_visibly_initialized_value =
7292       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
7293 
7294   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
7295   __ j(kBelow, slow_path->GetEntryLabel());
7296   __ Bind(slow_path->GetExitLabel());
7297 }
7298 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7299 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7300                                                                     Register temp) {
7301   uint32_t path_to_root = check->GetBitstringPathToRoot();
7302   uint32_t mask = check->GetBitstringMask();
7303   DCHECK(IsPowerOfTwo(mask + 1));
7304   size_t mask_bits = WhichPowerOf2(mask + 1);
7305 
7306   if (mask_bits == 16u) {
7307     // Compare the bitstring in memory.
7308     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7309   } else {
7310     // /* uint32_t */ temp = temp->status_
7311     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7312     // Compare the bitstring bits using SUB.
7313     __ subl(temp, Immediate(path_to_root));
7314     // Shift out bits that do not contribute to the comparison.
7315     __ shll(temp, Immediate(32u - mask_bits));
7316   }
7317 }
7318 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7319 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7320     HLoadString::LoadKind desired_string_load_kind) {
7321   switch (desired_string_load_kind) {
7322     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7323     case HLoadString::LoadKind::kBootImageRelRo:
7324     case HLoadString::LoadKind::kBssEntry:
7325       DCHECK(!GetCompilerOptions().IsJitCompiler());
7326       break;
7327     case HLoadString::LoadKind::kJitBootImageAddress:
7328     case HLoadString::LoadKind::kJitTableAddress:
7329       DCHECK(GetCompilerOptions().IsJitCompiler());
7330       break;
7331     case HLoadString::LoadKind::kRuntimeCall:
7332       break;
7333   }
7334   return desired_string_load_kind;
7335 }
7336 
VisitLoadString(HLoadString * load)7337 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7338   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7339   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7340   HLoadString::LoadKind load_kind = load->GetLoadKind();
7341   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7342       load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7343       load_kind == HLoadString::LoadKind::kBssEntry) {
7344     locations->SetInAt(0, Location::RequiresRegister());
7345   }
7346   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7347     locations->SetOut(Location::RegisterLocation(EAX));
7348   } else {
7349     locations->SetOut(Location::RequiresRegister());
7350     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7351       if (!kUseReadBarrier || kUseBakerReadBarrier) {
7352         // Rely on the pResolveString to save everything.
7353         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7354       } else {
7355         // For non-Baker read barrier we have a temp-clobbering call.
7356       }
7357     }
7358   }
7359 }
7360 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7361 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7362                                                dex::StringIndex string_index,
7363                                                Handle<mirror::String> handle) {
7364   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7365   // Add a patch entry and return the label.
7366   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7367   PatchInfo<Label>* info = &jit_string_patches_.back();
7368   return &info->label;
7369 }
7370 
7371 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7372 // move.
VisitLoadString(HLoadString * load)7373 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7374   LocationSummary* locations = load->GetLocations();
7375   Location out_loc = locations->Out();
7376   Register out = out_loc.AsRegister<Register>();
7377 
7378   switch (load->GetLoadKind()) {
7379     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7380       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7381              codegen_->GetCompilerOptions().IsBootImageExtension());
7382       Register method_address = locations->InAt(0).AsRegister<Register>();
7383       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7384       codegen_->RecordBootImageStringPatch(load);
7385       return;
7386     }
7387     case HLoadString::LoadKind::kBootImageRelRo: {
7388       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7389       Register method_address = locations->InAt(0).AsRegister<Register>();
7390       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7391       codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7392                                           CodeGenerator::GetBootImageOffset(load));
7393       return;
7394     }
7395     case HLoadString::LoadKind::kBssEntry: {
7396       Register method_address = locations->InAt(0).AsRegister<Register>();
7397       Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7398       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7399       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
7400       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7401       // No need for memory fence, thanks to the x86 memory model.
7402       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7403       codegen_->AddSlowPath(slow_path);
7404       __ testl(out, out);
7405       __ j(kEqual, slow_path->GetEntryLabel());
7406       __ Bind(slow_path->GetExitLabel());
7407       return;
7408     }
7409     case HLoadString::LoadKind::kJitBootImageAddress: {
7410       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7411       DCHECK_NE(address, 0u);
7412       __ movl(out, Immediate(address));
7413       return;
7414     }
7415     case HLoadString::LoadKind::kJitTableAddress: {
7416       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7417       Label* fixup_label = codegen_->NewJitRootStringPatch(
7418           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7419       // /* GcRoot<mirror::String> */ out = *address
7420       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7421       return;
7422     }
7423     default:
7424       break;
7425   }
7426 
7427   // TODO: Re-add the compiler code to do string dex cache lookup again.
7428   InvokeRuntimeCallingConvention calling_convention;
7429   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7430   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7431   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7432   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7433 }
7434 
GetExceptionTlsAddress()7435 static Address GetExceptionTlsAddress() {
7436   return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7437 }
7438 
VisitLoadException(HLoadException * load)7439 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7440   LocationSummary* locations =
7441       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7442   locations->SetOut(Location::RequiresRegister());
7443 }
7444 
VisitLoadException(HLoadException * load)7445 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7446   __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7447 }
7448 
VisitClearException(HClearException * clear)7449 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7450   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7451 }
7452 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7453 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7454   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7455 }
7456 
VisitThrow(HThrow * instruction)7457 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7458   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7459       instruction, LocationSummary::kCallOnMainOnly);
7460   InvokeRuntimeCallingConvention calling_convention;
7461   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7462 }
7463 
VisitThrow(HThrow * instruction)7464 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7465   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7466   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7467 }
7468 
7469 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7470 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7471   if (kEmitCompilerReadBarrier &&
7472       !kUseBakerReadBarrier &&
7473       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7474        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7475        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7476     return 1;
7477   }
7478   return 0;
7479 }
7480 
7481 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7482 // interface pointer, the current interface is compared in memory.
7483 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7484 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7485   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7486     return 2;
7487   }
7488   return 1 + NumberOfInstanceOfTemps(type_check_kind);
7489 }
7490 
VisitInstanceOf(HInstanceOf * instruction)7491 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7492   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7493   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7494   bool baker_read_barrier_slow_path = false;
7495   switch (type_check_kind) {
7496     case TypeCheckKind::kExactCheck:
7497     case TypeCheckKind::kAbstractClassCheck:
7498     case TypeCheckKind::kClassHierarchyCheck:
7499     case TypeCheckKind::kArrayObjectCheck: {
7500       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7501       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7502       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7503       break;
7504     }
7505     case TypeCheckKind::kArrayCheck:
7506     case TypeCheckKind::kUnresolvedCheck:
7507     case TypeCheckKind::kInterfaceCheck:
7508       call_kind = LocationSummary::kCallOnSlowPath;
7509       break;
7510     case TypeCheckKind::kBitstringCheck:
7511       break;
7512   }
7513 
7514   LocationSummary* locations =
7515       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7516   if (baker_read_barrier_slow_path) {
7517     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7518   }
7519   locations->SetInAt(0, Location::RequiresRegister());
7520   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7521     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7522     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7523     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7524   } else {
7525     locations->SetInAt(1, Location::Any());
7526   }
7527   // Note that TypeCheckSlowPathX86 uses this "out" register too.
7528   locations->SetOut(Location::RequiresRegister());
7529   // When read barriers are enabled, we need a temporary register for some cases.
7530   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7531 }
7532 
VisitInstanceOf(HInstanceOf * instruction)7533 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7534   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7535   LocationSummary* locations = instruction->GetLocations();
7536   Location obj_loc = locations->InAt(0);
7537   Register obj = obj_loc.AsRegister<Register>();
7538   Location cls = locations->InAt(1);
7539   Location out_loc = locations->Out();
7540   Register out = out_loc.AsRegister<Register>();
7541   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7542   DCHECK_LE(num_temps, 1u);
7543   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7544   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7545   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7546   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7547   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7548   SlowPathCode* slow_path = nullptr;
7549   NearLabel done, zero;
7550 
7551   // Return 0 if `obj` is null.
7552   // Avoid null check if we know obj is not null.
7553   if (instruction->MustDoNullCheck()) {
7554     __ testl(obj, obj);
7555     __ j(kEqual, &zero);
7556   }
7557 
7558   switch (type_check_kind) {
7559     case TypeCheckKind::kExactCheck: {
7560       ReadBarrierOption read_barrier_option =
7561           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7562       // /* HeapReference<Class> */ out = obj->klass_
7563       GenerateReferenceLoadTwoRegisters(instruction,
7564                                         out_loc,
7565                                         obj_loc,
7566                                         class_offset,
7567                                         read_barrier_option);
7568       if (cls.IsRegister()) {
7569         __ cmpl(out, cls.AsRegister<Register>());
7570       } else {
7571         DCHECK(cls.IsStackSlot()) << cls;
7572         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7573       }
7574 
7575       // Classes must be equal for the instanceof to succeed.
7576       __ j(kNotEqual, &zero);
7577       __ movl(out, Immediate(1));
7578       __ jmp(&done);
7579       break;
7580     }
7581 
7582     case TypeCheckKind::kAbstractClassCheck: {
7583       ReadBarrierOption read_barrier_option =
7584           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7585       // /* HeapReference<Class> */ out = obj->klass_
7586       GenerateReferenceLoadTwoRegisters(instruction,
7587                                         out_loc,
7588                                         obj_loc,
7589                                         class_offset,
7590                                         read_barrier_option);
7591       // If the class is abstract, we eagerly fetch the super class of the
7592       // object to avoid doing a comparison we know will fail.
7593       NearLabel loop;
7594       __ Bind(&loop);
7595       // /* HeapReference<Class> */ out = out->super_class_
7596       GenerateReferenceLoadOneRegister(instruction,
7597                                        out_loc,
7598                                        super_offset,
7599                                        maybe_temp_loc,
7600                                        read_barrier_option);
7601       __ testl(out, out);
7602       // If `out` is null, we use it for the result, and jump to `done`.
7603       __ j(kEqual, &done);
7604       if (cls.IsRegister()) {
7605         __ cmpl(out, cls.AsRegister<Register>());
7606       } else {
7607         DCHECK(cls.IsStackSlot()) << cls;
7608         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7609       }
7610       __ j(kNotEqual, &loop);
7611       __ movl(out, Immediate(1));
7612       if (zero.IsLinked()) {
7613         __ jmp(&done);
7614       }
7615       break;
7616     }
7617 
7618     case TypeCheckKind::kClassHierarchyCheck: {
7619       ReadBarrierOption read_barrier_option =
7620           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7621       // /* HeapReference<Class> */ out = obj->klass_
7622       GenerateReferenceLoadTwoRegisters(instruction,
7623                                         out_loc,
7624                                         obj_loc,
7625                                         class_offset,
7626                                         read_barrier_option);
7627       // Walk over the class hierarchy to find a match.
7628       NearLabel loop, success;
7629       __ Bind(&loop);
7630       if (cls.IsRegister()) {
7631         __ cmpl(out, cls.AsRegister<Register>());
7632       } else {
7633         DCHECK(cls.IsStackSlot()) << cls;
7634         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7635       }
7636       __ j(kEqual, &success);
7637       // /* HeapReference<Class> */ out = out->super_class_
7638       GenerateReferenceLoadOneRegister(instruction,
7639                                        out_loc,
7640                                        super_offset,
7641                                        maybe_temp_loc,
7642                                        read_barrier_option);
7643       __ testl(out, out);
7644       __ j(kNotEqual, &loop);
7645       // If `out` is null, we use it for the result, and jump to `done`.
7646       __ jmp(&done);
7647       __ Bind(&success);
7648       __ movl(out, Immediate(1));
7649       if (zero.IsLinked()) {
7650         __ jmp(&done);
7651       }
7652       break;
7653     }
7654 
7655     case TypeCheckKind::kArrayObjectCheck: {
7656       ReadBarrierOption read_barrier_option =
7657           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7658       // /* HeapReference<Class> */ out = obj->klass_
7659       GenerateReferenceLoadTwoRegisters(instruction,
7660                                         out_loc,
7661                                         obj_loc,
7662                                         class_offset,
7663                                         read_barrier_option);
7664       // Do an exact check.
7665       NearLabel exact_check;
7666       if (cls.IsRegister()) {
7667         __ cmpl(out, cls.AsRegister<Register>());
7668       } else {
7669         DCHECK(cls.IsStackSlot()) << cls;
7670         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7671       }
7672       __ j(kEqual, &exact_check);
7673       // Otherwise, we need to check that the object's class is a non-primitive array.
7674       // /* HeapReference<Class> */ out = out->component_type_
7675       GenerateReferenceLoadOneRegister(instruction,
7676                                        out_loc,
7677                                        component_offset,
7678                                        maybe_temp_loc,
7679                                        read_barrier_option);
7680       __ testl(out, out);
7681       // If `out` is null, we use it for the result, and jump to `done`.
7682       __ j(kEqual, &done);
7683       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7684       __ j(kNotEqual, &zero);
7685       __ Bind(&exact_check);
7686       __ movl(out, Immediate(1));
7687       __ jmp(&done);
7688       break;
7689     }
7690 
7691     case TypeCheckKind::kArrayCheck: {
7692       // No read barrier since the slow path will retry upon failure.
7693       // /* HeapReference<Class> */ out = obj->klass_
7694       GenerateReferenceLoadTwoRegisters(instruction,
7695                                         out_loc,
7696                                         obj_loc,
7697                                         class_offset,
7698                                         kWithoutReadBarrier);
7699       if (cls.IsRegister()) {
7700         __ cmpl(out, cls.AsRegister<Register>());
7701       } else {
7702         DCHECK(cls.IsStackSlot()) << cls;
7703         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7704       }
7705       DCHECK(locations->OnlyCallsOnSlowPath());
7706       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7707           instruction, /* is_fatal= */ false);
7708       codegen_->AddSlowPath(slow_path);
7709       __ j(kNotEqual, slow_path->GetEntryLabel());
7710       __ movl(out, Immediate(1));
7711       if (zero.IsLinked()) {
7712         __ jmp(&done);
7713       }
7714       break;
7715     }
7716 
7717     case TypeCheckKind::kUnresolvedCheck:
7718     case TypeCheckKind::kInterfaceCheck: {
7719       // Note that we indeed only call on slow path, but we always go
7720       // into the slow path for the unresolved and interface check
7721       // cases.
7722       //
7723       // We cannot directly call the InstanceofNonTrivial runtime
7724       // entry point without resorting to a type checking slow path
7725       // here (i.e. by calling InvokeRuntime directly), as it would
7726       // require to assign fixed registers for the inputs of this
7727       // HInstanceOf instruction (following the runtime calling
7728       // convention), which might be cluttered by the potential first
7729       // read barrier emission at the beginning of this method.
7730       //
7731       // TODO: Introduce a new runtime entry point taking the object
7732       // to test (instead of its class) as argument, and let it deal
7733       // with the read barrier issues. This will let us refactor this
7734       // case of the `switch` code as it was previously (with a direct
7735       // call to the runtime not using a type checking slow path).
7736       // This should also be beneficial for the other cases above.
7737       DCHECK(locations->OnlyCallsOnSlowPath());
7738       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7739           instruction, /* is_fatal= */ false);
7740       codegen_->AddSlowPath(slow_path);
7741       __ jmp(slow_path->GetEntryLabel());
7742       if (zero.IsLinked()) {
7743         __ jmp(&done);
7744       }
7745       break;
7746     }
7747 
7748     case TypeCheckKind::kBitstringCheck: {
7749       // /* HeapReference<Class> */ temp = obj->klass_
7750       GenerateReferenceLoadTwoRegisters(instruction,
7751                                         out_loc,
7752                                         obj_loc,
7753                                         class_offset,
7754                                         kWithoutReadBarrier);
7755 
7756       GenerateBitstringTypeCheckCompare(instruction, out);
7757       __ j(kNotEqual, &zero);
7758       __ movl(out, Immediate(1));
7759       __ jmp(&done);
7760       break;
7761     }
7762   }
7763 
7764   if (zero.IsLinked()) {
7765     __ Bind(&zero);
7766     __ xorl(out, out);
7767   }
7768 
7769   if (done.IsLinked()) {
7770     __ Bind(&done);
7771   }
7772 
7773   if (slow_path != nullptr) {
7774     __ Bind(slow_path->GetExitLabel());
7775   }
7776 }
7777 
VisitCheckCast(HCheckCast * instruction)7778 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7779   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7780   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7781   LocationSummary* locations =
7782       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7783   locations->SetInAt(0, Location::RequiresRegister());
7784   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7785     // Require a register for the interface check since there is a loop that compares the class to
7786     // a memory address.
7787     locations->SetInAt(1, Location::RequiresRegister());
7788   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7789     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7790     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7791     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7792   } else {
7793     locations->SetInAt(1, Location::Any());
7794   }
7795   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7796   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7797 }
7798 
VisitCheckCast(HCheckCast * instruction)7799 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7800   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7801   LocationSummary* locations = instruction->GetLocations();
7802   Location obj_loc = locations->InAt(0);
7803   Register obj = obj_loc.AsRegister<Register>();
7804   Location cls = locations->InAt(1);
7805   Location temp_loc = locations->GetTemp(0);
7806   Register temp = temp_loc.AsRegister<Register>();
7807   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7808   DCHECK_GE(num_temps, 1u);
7809   DCHECK_LE(num_temps, 2u);
7810   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7811   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7812   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7813   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7814   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7815   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7816   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7817   const uint32_t object_array_data_offset =
7818       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7819 
7820   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7821   SlowPathCode* type_check_slow_path =
7822       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7823           instruction, is_type_check_slow_path_fatal);
7824   codegen_->AddSlowPath(type_check_slow_path);
7825 
7826   NearLabel done;
7827   // Avoid null check if we know obj is not null.
7828   if (instruction->MustDoNullCheck()) {
7829     __ testl(obj, obj);
7830     __ j(kEqual, &done);
7831   }
7832 
7833   switch (type_check_kind) {
7834     case TypeCheckKind::kExactCheck:
7835     case TypeCheckKind::kArrayCheck: {
7836       // /* HeapReference<Class> */ temp = obj->klass_
7837       GenerateReferenceLoadTwoRegisters(instruction,
7838                                         temp_loc,
7839                                         obj_loc,
7840                                         class_offset,
7841                                         kWithoutReadBarrier);
7842 
7843       if (cls.IsRegister()) {
7844         __ cmpl(temp, cls.AsRegister<Register>());
7845       } else {
7846         DCHECK(cls.IsStackSlot()) << cls;
7847         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7848       }
7849       // Jump to slow path for throwing the exception or doing a
7850       // more involved array check.
7851       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7852       break;
7853     }
7854 
7855     case TypeCheckKind::kAbstractClassCheck: {
7856       // /* HeapReference<Class> */ temp = obj->klass_
7857       GenerateReferenceLoadTwoRegisters(instruction,
7858                                         temp_loc,
7859                                         obj_loc,
7860                                         class_offset,
7861                                         kWithoutReadBarrier);
7862 
7863       // If the class is abstract, we eagerly fetch the super class of the
7864       // object to avoid doing a comparison we know will fail.
7865       NearLabel loop;
7866       __ Bind(&loop);
7867       // /* HeapReference<Class> */ temp = temp->super_class_
7868       GenerateReferenceLoadOneRegister(instruction,
7869                                        temp_loc,
7870                                        super_offset,
7871                                        maybe_temp2_loc,
7872                                        kWithoutReadBarrier);
7873 
7874       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7875       // exception.
7876       __ testl(temp, temp);
7877       __ j(kZero, type_check_slow_path->GetEntryLabel());
7878 
7879       // Otherwise, compare the classes
7880       if (cls.IsRegister()) {
7881         __ cmpl(temp, cls.AsRegister<Register>());
7882       } else {
7883         DCHECK(cls.IsStackSlot()) << cls;
7884         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7885       }
7886       __ j(kNotEqual, &loop);
7887       break;
7888     }
7889 
7890     case TypeCheckKind::kClassHierarchyCheck: {
7891       // /* HeapReference<Class> */ temp = obj->klass_
7892       GenerateReferenceLoadTwoRegisters(instruction,
7893                                         temp_loc,
7894                                         obj_loc,
7895                                         class_offset,
7896                                         kWithoutReadBarrier);
7897 
7898       // Walk over the class hierarchy to find a match.
7899       NearLabel loop;
7900       __ Bind(&loop);
7901       if (cls.IsRegister()) {
7902         __ cmpl(temp, cls.AsRegister<Register>());
7903       } else {
7904         DCHECK(cls.IsStackSlot()) << cls;
7905         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7906       }
7907       __ j(kEqual, &done);
7908 
7909       // /* HeapReference<Class> */ temp = temp->super_class_
7910       GenerateReferenceLoadOneRegister(instruction,
7911                                        temp_loc,
7912                                        super_offset,
7913                                        maybe_temp2_loc,
7914                                        kWithoutReadBarrier);
7915 
7916       // If the class reference currently in `temp` is not null, jump
7917       // back at the beginning of the loop.
7918       __ testl(temp, temp);
7919       __ j(kNotZero, &loop);
7920       // Otherwise, jump to the slow path to throw the exception.;
7921       __ jmp(type_check_slow_path->GetEntryLabel());
7922       break;
7923     }
7924 
7925     case TypeCheckKind::kArrayObjectCheck: {
7926       // /* HeapReference<Class> */ temp = obj->klass_
7927       GenerateReferenceLoadTwoRegisters(instruction,
7928                                         temp_loc,
7929                                         obj_loc,
7930                                         class_offset,
7931                                         kWithoutReadBarrier);
7932 
7933       // Do an exact check.
7934       if (cls.IsRegister()) {
7935         __ cmpl(temp, cls.AsRegister<Register>());
7936       } else {
7937         DCHECK(cls.IsStackSlot()) << cls;
7938         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7939       }
7940       __ j(kEqual, &done);
7941 
7942       // Otherwise, we need to check that the object's class is a non-primitive array.
7943       // /* HeapReference<Class> */ temp = temp->component_type_
7944       GenerateReferenceLoadOneRegister(instruction,
7945                                        temp_loc,
7946                                        component_offset,
7947                                        maybe_temp2_loc,
7948                                        kWithoutReadBarrier);
7949 
7950       // If the component type is null (i.e. the object not an array),  jump to the slow path to
7951       // throw the exception. Otherwise proceed with the check.
7952       __ testl(temp, temp);
7953       __ j(kZero, type_check_slow_path->GetEntryLabel());
7954 
7955       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7956       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7957       break;
7958     }
7959 
7960     case TypeCheckKind::kUnresolvedCheck:
7961       // We always go into the type check slow path for the unresolved check case.
7962       // We cannot directly call the CheckCast runtime entry point
7963       // without resorting to a type checking slow path here (i.e. by
7964       // calling InvokeRuntime directly), as it would require to
7965       // assign fixed registers for the inputs of this HInstanceOf
7966       // instruction (following the runtime calling convention), which
7967       // might be cluttered by the potential first read barrier
7968       // emission at the beginning of this method.
7969       __ jmp(type_check_slow_path->GetEntryLabel());
7970       break;
7971 
7972     case TypeCheckKind::kInterfaceCheck: {
7973       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7974       // We can not get false positives by doing this.
7975       // /* HeapReference<Class> */ temp = obj->klass_
7976       GenerateReferenceLoadTwoRegisters(instruction,
7977                                         temp_loc,
7978                                         obj_loc,
7979                                         class_offset,
7980                                         kWithoutReadBarrier);
7981 
7982       // /* HeapReference<Class> */ temp = temp->iftable_
7983       GenerateReferenceLoadTwoRegisters(instruction,
7984                                         temp_loc,
7985                                         temp_loc,
7986                                         iftable_offset,
7987                                         kWithoutReadBarrier);
7988       // Iftable is never null.
7989       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7990       // Maybe poison the `cls` for direct comparison with memory.
7991       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7992       // Loop through the iftable and check if any class matches.
7993       NearLabel start_loop;
7994       __ Bind(&start_loop);
7995       // Need to subtract first to handle the empty array case.
7996       __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7997       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7998       // Go to next interface if the classes do not match.
7999       __ cmpl(cls.AsRegister<Register>(),
8000               CodeGeneratorX86::ArrayAddress(temp,
8001                                              maybe_temp2_loc,
8002                                              TIMES_4,
8003                                              object_array_data_offset));
8004       __ j(kNotEqual, &start_loop);
8005       // If `cls` was poisoned above, unpoison it.
8006       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8007       break;
8008     }
8009 
8010     case TypeCheckKind::kBitstringCheck: {
8011       // /* HeapReference<Class> */ temp = obj->klass_
8012       GenerateReferenceLoadTwoRegisters(instruction,
8013                                         temp_loc,
8014                                         obj_loc,
8015                                         class_offset,
8016                                         kWithoutReadBarrier);
8017 
8018       GenerateBitstringTypeCheckCompare(instruction, temp);
8019       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8020       break;
8021     }
8022   }
8023   __ Bind(&done);
8024 
8025   __ Bind(type_check_slow_path->GetExitLabel());
8026 }
8027 
VisitMonitorOperation(HMonitorOperation * instruction)8028 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8029   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8030       instruction, LocationSummary::kCallOnMainOnly);
8031   InvokeRuntimeCallingConvention calling_convention;
8032   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8033 }
8034 
VisitMonitorOperation(HMonitorOperation * instruction)8035 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8036   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8037                                                  : kQuickUnlockObject,
8038                           instruction,
8039                           instruction->GetDexPc());
8040   if (instruction->IsEnter()) {
8041     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8042   } else {
8043     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8044   }
8045 }
8046 
VisitX86AndNot(HX86AndNot * instruction)8047 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8048   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8049   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8050   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8051   locations->SetInAt(0, Location::RequiresRegister());
8052   locations->SetInAt(1, Location::RequiresRegister());
8053   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8054 }
8055 
VisitX86AndNot(HX86AndNot * instruction)8056 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8057   LocationSummary* locations = instruction->GetLocations();
8058   Location first = locations->InAt(0);
8059   Location second = locations->InAt(1);
8060   Location dest = locations->Out();
8061   if (instruction->GetResultType() == DataType::Type::kInt32) {
8062     __ andn(dest.AsRegister<Register>(),
8063             first.AsRegister<Register>(),
8064             second.AsRegister<Register>());
8065   } else {
8066     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8067     __ andn(dest.AsRegisterPairLow<Register>(),
8068             first.AsRegisterPairLow<Register>(),
8069             second.AsRegisterPairLow<Register>());
8070     __ andn(dest.AsRegisterPairHigh<Register>(),
8071             first.AsRegisterPairHigh<Register>(),
8072             second.AsRegisterPairHigh<Register>());
8073   }
8074 }
8075 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8076 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8077   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8078   DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8079   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8080   locations->SetInAt(0, Location::RequiresRegister());
8081   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8082 }
8083 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8084 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8085     HX86MaskOrResetLeastSetBit* instruction) {
8086   LocationSummary* locations = instruction->GetLocations();
8087   Location src = locations->InAt(0);
8088   Location dest = locations->Out();
8089   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8090   switch (instruction->GetOpKind()) {
8091     case HInstruction::kAnd:
8092       __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8093       break;
8094     case HInstruction::kXor:
8095       __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8096       break;
8097     default:
8098       LOG(FATAL) << "Unreachable";
8099   }
8100 }
8101 
VisitAnd(HAnd * instruction)8102 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8103 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8104 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8105 
HandleBitwiseOperation(HBinaryOperation * instruction)8106 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8107   LocationSummary* locations =
8108       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8109   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8110          || instruction->GetResultType() == DataType::Type::kInt64);
8111   locations->SetInAt(0, Location::RequiresRegister());
8112   locations->SetInAt(1, Location::Any());
8113   locations->SetOut(Location::SameAsFirstInput());
8114 }
8115 
VisitAnd(HAnd * instruction)8116 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8117   HandleBitwiseOperation(instruction);
8118 }
8119 
VisitOr(HOr * instruction)8120 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8121   HandleBitwiseOperation(instruction);
8122 }
8123 
VisitXor(HXor * instruction)8124 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8125   HandleBitwiseOperation(instruction);
8126 }
8127 
HandleBitwiseOperation(HBinaryOperation * instruction)8128 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8129   LocationSummary* locations = instruction->GetLocations();
8130   Location first = locations->InAt(0);
8131   Location second = locations->InAt(1);
8132   DCHECK(first.Equals(locations->Out()));
8133 
8134   if (instruction->GetResultType() == DataType::Type::kInt32) {
8135     if (second.IsRegister()) {
8136       if (instruction->IsAnd()) {
8137         __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8138       } else if (instruction->IsOr()) {
8139         __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8140       } else {
8141         DCHECK(instruction->IsXor());
8142         __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8143       }
8144     } else if (second.IsConstant()) {
8145       if (instruction->IsAnd()) {
8146         __ andl(first.AsRegister<Register>(),
8147                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8148       } else if (instruction->IsOr()) {
8149         __ orl(first.AsRegister<Register>(),
8150                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8151       } else {
8152         DCHECK(instruction->IsXor());
8153         __ xorl(first.AsRegister<Register>(),
8154                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8155       }
8156     } else {
8157       if (instruction->IsAnd()) {
8158         __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8159       } else if (instruction->IsOr()) {
8160         __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8161       } else {
8162         DCHECK(instruction->IsXor());
8163         __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8164       }
8165     }
8166   } else {
8167     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8168     if (second.IsRegisterPair()) {
8169       if (instruction->IsAnd()) {
8170         __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8171         __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8172       } else if (instruction->IsOr()) {
8173         __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8174         __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8175       } else {
8176         DCHECK(instruction->IsXor());
8177         __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8178         __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8179       }
8180     } else if (second.IsDoubleStackSlot()) {
8181       if (instruction->IsAnd()) {
8182         __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8183         __ andl(first.AsRegisterPairHigh<Register>(),
8184                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8185       } else if (instruction->IsOr()) {
8186         __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8187         __ orl(first.AsRegisterPairHigh<Register>(),
8188                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8189       } else {
8190         DCHECK(instruction->IsXor());
8191         __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8192         __ xorl(first.AsRegisterPairHigh<Register>(),
8193                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8194       }
8195     } else {
8196       DCHECK(second.IsConstant()) << second;
8197       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8198       int32_t low_value = Low32Bits(value);
8199       int32_t high_value = High32Bits(value);
8200       Immediate low(low_value);
8201       Immediate high(high_value);
8202       Register first_low = first.AsRegisterPairLow<Register>();
8203       Register first_high = first.AsRegisterPairHigh<Register>();
8204       if (instruction->IsAnd()) {
8205         if (low_value == 0) {
8206           __ xorl(first_low, first_low);
8207         } else if (low_value != -1) {
8208           __ andl(first_low, low);
8209         }
8210         if (high_value == 0) {
8211           __ xorl(first_high, first_high);
8212         } else if (high_value != -1) {
8213           __ andl(first_high, high);
8214         }
8215       } else if (instruction->IsOr()) {
8216         if (low_value != 0) {
8217           __ orl(first_low, low);
8218         }
8219         if (high_value != 0) {
8220           __ orl(first_high, high);
8221         }
8222       } else {
8223         DCHECK(instruction->IsXor());
8224         if (low_value != 0) {
8225           __ xorl(first_low, low);
8226         }
8227         if (high_value != 0) {
8228           __ xorl(first_high, high);
8229         }
8230       }
8231     }
8232   }
8233 }
8234 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8235 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8236     HInstruction* instruction,
8237     Location out,
8238     uint32_t offset,
8239     Location maybe_temp,
8240     ReadBarrierOption read_barrier_option) {
8241   Register out_reg = out.AsRegister<Register>();
8242   if (read_barrier_option == kWithReadBarrier) {
8243     CHECK(kEmitCompilerReadBarrier);
8244     if (kUseBakerReadBarrier) {
8245       // Load with fast path based Baker's read barrier.
8246       // /* HeapReference<Object> */ out = *(out + offset)
8247       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8248           instruction, out, out_reg, offset, /* needs_null_check= */ false);
8249     } else {
8250       // Load with slow path based read barrier.
8251       // Save the value of `out` into `maybe_temp` before overwriting it
8252       // in the following move operation, as we will need it for the
8253       // read barrier below.
8254       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8255       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8256       // /* HeapReference<Object> */ out = *(out + offset)
8257       __ movl(out_reg, Address(out_reg, offset));
8258       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8259     }
8260   } else {
8261     // Plain load with no read barrier.
8262     // /* HeapReference<Object> */ out = *(out + offset)
8263     __ movl(out_reg, Address(out_reg, offset));
8264     __ MaybeUnpoisonHeapReference(out_reg);
8265   }
8266 }
8267 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8268 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8269     HInstruction* instruction,
8270     Location out,
8271     Location obj,
8272     uint32_t offset,
8273     ReadBarrierOption read_barrier_option) {
8274   Register out_reg = out.AsRegister<Register>();
8275   Register obj_reg = obj.AsRegister<Register>();
8276   if (read_barrier_option == kWithReadBarrier) {
8277     CHECK(kEmitCompilerReadBarrier);
8278     if (kUseBakerReadBarrier) {
8279       // Load with fast path based Baker's read barrier.
8280       // /* HeapReference<Object> */ out = *(obj + offset)
8281       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8282           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8283     } else {
8284       // Load with slow path based read barrier.
8285       // /* HeapReference<Object> */ out = *(obj + offset)
8286       __ movl(out_reg, Address(obj_reg, offset));
8287       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8288     }
8289   } else {
8290     // Plain load with no read barrier.
8291     // /* HeapReference<Object> */ out = *(obj + offset)
8292     __ movl(out_reg, Address(obj_reg, offset));
8293     __ MaybeUnpoisonHeapReference(out_reg);
8294   }
8295 }
8296 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8297 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8298     HInstruction* instruction,
8299     Location root,
8300     const Address& address,
8301     Label* fixup_label,
8302     ReadBarrierOption read_barrier_option) {
8303   Register root_reg = root.AsRegister<Register>();
8304   if (read_barrier_option == kWithReadBarrier) {
8305     DCHECK(kEmitCompilerReadBarrier);
8306     if (kUseBakerReadBarrier) {
8307       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8308       // Baker's read barrier are used:
8309       //
8310       //   root = obj.field;
8311       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8312       //   if (temp != null) {
8313       //     root = temp(root)
8314       //   }
8315 
8316       // /* GcRoot<mirror::Object> */ root = *address
8317       __ movl(root_reg, address);
8318       if (fixup_label != nullptr) {
8319         __ Bind(fixup_label);
8320       }
8321       static_assert(
8322           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8323           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8324           "have different sizes.");
8325       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8326                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
8327                     "have different sizes.");
8328 
8329       // Slow path marking the GC root `root`.
8330       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8331           instruction, root, /* unpoison_ref_before_marking= */ false);
8332       codegen_->AddSlowPath(slow_path);
8333 
8334       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8335       const int32_t entry_point_offset =
8336           Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8337       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8338       // The entrypoint is null when the GC is not marking.
8339       __ j(kNotEqual, slow_path->GetEntryLabel());
8340       __ Bind(slow_path->GetExitLabel());
8341     } else {
8342       // GC root loaded through a slow path for read barriers other
8343       // than Baker's.
8344       // /* GcRoot<mirror::Object>* */ root = address
8345       __ leal(root_reg, address);
8346       if (fixup_label != nullptr) {
8347         __ Bind(fixup_label);
8348       }
8349       // /* mirror::Object* */ root = root->Read()
8350       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8351     }
8352   } else {
8353     // Plain GC root load with no read barrier.
8354     // /* GcRoot<mirror::Object> */ root = *address
8355     __ movl(root_reg, address);
8356     if (fixup_label != nullptr) {
8357       __ Bind(fixup_label);
8358     }
8359     // Note that GC roots are not affected by heap poisoning, thus we
8360     // do not have to unpoison `root_reg` here.
8361   }
8362 }
8363 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8364 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8365                                                              Location ref,
8366                                                              Register obj,
8367                                                              uint32_t offset,
8368                                                              bool needs_null_check) {
8369   DCHECK(kEmitCompilerReadBarrier);
8370   DCHECK(kUseBakerReadBarrier);
8371 
8372   // /* HeapReference<Object> */ ref = *(obj + offset)
8373   Address src(obj, offset);
8374   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8375 }
8376 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8377 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8378                                                              Location ref,
8379                                                              Register obj,
8380                                                              uint32_t data_offset,
8381                                                              Location index,
8382                                                              bool needs_null_check) {
8383   DCHECK(kEmitCompilerReadBarrier);
8384   DCHECK(kUseBakerReadBarrier);
8385 
8386   static_assert(
8387       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8388       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8389   // /* HeapReference<Object> */ ref =
8390   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8391   Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8392   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8393 }
8394 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8395 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8396                                                                  Location ref,
8397                                                                  Register obj,
8398                                                                  const Address& src,
8399                                                                  bool needs_null_check,
8400                                                                  bool always_update_field,
8401                                                                  Register* temp) {
8402   DCHECK(kEmitCompilerReadBarrier);
8403   DCHECK(kUseBakerReadBarrier);
8404 
8405   // In slow path based read barriers, the read barrier call is
8406   // inserted after the original load. However, in fast path based
8407   // Baker's read barriers, we need to perform the load of
8408   // mirror::Object::monitor_ *before* the original reference load.
8409   // This load-load ordering is required by the read barrier.
8410   // The fast path/slow path (for Baker's algorithm) should look like:
8411   //
8412   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8413   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8414   //   HeapReference<Object> ref = *src;  // Original reference load.
8415   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8416   //   if (is_gray) {
8417   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8418   //   }
8419   //
8420   // Note: the original implementation in ReadBarrier::Barrier is
8421   // slightly more complex as:
8422   // - it implements the load-load fence using a data dependency on
8423   //   the high-bits of rb_state, which are expected to be all zeroes
8424   //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8425   //   which is a no-op thanks to the x86 memory model);
8426   // - it performs additional checks that we do not do here for
8427   //   performance reasons.
8428 
8429   Register ref_reg = ref.AsRegister<Register>();
8430   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8431 
8432   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8433   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8434   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8435   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8436   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8437   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8438 
8439   // if (rb_state == ReadBarrier::GrayState())
8440   //   ref = ReadBarrier::Mark(ref);
8441   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8442   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8443   if (needs_null_check) {
8444     MaybeRecordImplicitNullCheck(instruction);
8445   }
8446 
8447   // Load fence to prevent load-load reordering.
8448   // Note that this is a no-op, thanks to the x86 memory model.
8449   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8450 
8451   // The actual reference load.
8452   // /* HeapReference<Object> */ ref = *src
8453   __ movl(ref_reg, src);  // Flags are unaffected.
8454 
8455   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8456   // Slow path marking the object `ref` when it is gray.
8457   SlowPathCode* slow_path;
8458   if (always_update_field) {
8459     DCHECK(temp != nullptr);
8460     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8461         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8462   } else {
8463     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8464         instruction, ref, /* unpoison_ref_before_marking= */ true);
8465   }
8466   AddSlowPath(slow_path);
8467 
8468   // We have done the "if" of the gray bit check above, now branch based on the flags.
8469   __ j(kNotZero, slow_path->GetEntryLabel());
8470 
8471   // Object* ref = ref_addr->AsMirrorPtr()
8472   __ MaybeUnpoisonHeapReference(ref_reg);
8473 
8474   __ Bind(slow_path->GetExitLabel());
8475 }
8476 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8477 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8478                                                Location out,
8479                                                Location ref,
8480                                                Location obj,
8481                                                uint32_t offset,
8482                                                Location index) {
8483   DCHECK(kEmitCompilerReadBarrier);
8484 
8485   // Insert a slow path based read barrier *after* the reference load.
8486   //
8487   // If heap poisoning is enabled, the unpoisoning of the loaded
8488   // reference will be carried out by the runtime within the slow
8489   // path.
8490   //
8491   // Note that `ref` currently does not get unpoisoned (when heap
8492   // poisoning is enabled), which is alright as the `ref` argument is
8493   // not used by the artReadBarrierSlow entry point.
8494   //
8495   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8496   SlowPathCode* slow_path = new (GetScopedAllocator())
8497       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8498   AddSlowPath(slow_path);
8499 
8500   __ jmp(slow_path->GetEntryLabel());
8501   __ Bind(slow_path->GetExitLabel());
8502 }
8503 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8504 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8505                                                     Location out,
8506                                                     Location ref,
8507                                                     Location obj,
8508                                                     uint32_t offset,
8509                                                     Location index) {
8510   if (kEmitCompilerReadBarrier) {
8511     // Baker's read barriers shall be handled by the fast path
8512     // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8513     DCHECK(!kUseBakerReadBarrier);
8514     // If heap poisoning is enabled, unpoisoning will be taken care of
8515     // by the runtime within the slow path.
8516     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8517   } else if (kPoisonHeapReferences) {
8518     __ UnpoisonHeapReference(out.AsRegister<Register>());
8519   }
8520 }
8521 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8522 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8523                                                       Location out,
8524                                                       Location root) {
8525   DCHECK(kEmitCompilerReadBarrier);
8526 
8527   // Insert a slow path based read barrier *after* the GC root load.
8528   //
8529   // Note that GC roots are not affected by heap poisoning, so we do
8530   // not need to do anything special for this here.
8531   SlowPathCode* slow_path =
8532       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8533   AddSlowPath(slow_path);
8534 
8535   __ jmp(slow_path->GetEntryLabel());
8536   __ Bind(slow_path->GetExitLabel());
8537 }
8538 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8539 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8540   // Nothing to do, this should be removed during prepare for register allocator.
8541   LOG(FATAL) << "Unreachable";
8542 }
8543 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8544 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8545   // Nothing to do, this should be removed during prepare for register allocator.
8546   LOG(FATAL) << "Unreachable";
8547 }
8548 
8549 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8550 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8551   LocationSummary* locations =
8552       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8553   locations->SetInAt(0, Location::RequiresRegister());
8554 }
8555 
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8556 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8557                                                               int32_t lower_bound,
8558                                                               uint32_t num_entries,
8559                                                               HBasicBlock* switch_block,
8560                                                               HBasicBlock* default_block) {
8561   // Figure out the correct compare values and jump conditions.
8562   // Handle the first compare/branch as a special case because it might
8563   // jump to the default case.
8564   DCHECK_GT(num_entries, 2u);
8565   Condition first_condition;
8566   uint32_t index;
8567   const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8568   if (lower_bound != 0) {
8569     first_condition = kLess;
8570     __ cmpl(value_reg, Immediate(lower_bound));
8571     __ j(first_condition, codegen_->GetLabelOf(default_block));
8572     __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8573 
8574     index = 1;
8575   } else {
8576     // Handle all the compare/jumps below.
8577     first_condition = kBelow;
8578     index = 0;
8579   }
8580 
8581   // Handle the rest of the compare/jumps.
8582   for (; index + 1 < num_entries; index += 2) {
8583     int32_t compare_to_value = lower_bound + index + 1;
8584     __ cmpl(value_reg, Immediate(compare_to_value));
8585     // Jump to successors[index] if value < case_value[index].
8586     __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8587     // Jump to successors[index + 1] if value == case_value[index + 1].
8588     __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8589   }
8590 
8591   if (index != num_entries) {
8592     // There are an odd number of entries. Handle the last one.
8593     DCHECK_EQ(index + 1, num_entries);
8594     __ cmpl(value_reg, Immediate(lower_bound + index));
8595     __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8596   }
8597 
8598   // And the default for any other value.
8599   if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8600     __ jmp(codegen_->GetLabelOf(default_block));
8601   }
8602 }
8603 
VisitPackedSwitch(HPackedSwitch * switch_instr)8604 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8605   int32_t lower_bound = switch_instr->GetStartValue();
8606   uint32_t num_entries = switch_instr->GetNumEntries();
8607   LocationSummary* locations = switch_instr->GetLocations();
8608   Register value_reg = locations->InAt(0).AsRegister<Register>();
8609 
8610   GenPackedSwitchWithCompares(value_reg,
8611                               lower_bound,
8612                               num_entries,
8613                               switch_instr->GetBlock(),
8614                               switch_instr->GetDefaultBlock());
8615 }
8616 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8617 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8618   LocationSummary* locations =
8619       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8620   locations->SetInAt(0, Location::RequiresRegister());
8621 
8622   // Constant area pointer.
8623   locations->SetInAt(1, Location::RequiresRegister());
8624 
8625   // And the temporary we need.
8626   locations->AddTemp(Location::RequiresRegister());
8627 }
8628 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8629 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8630   int32_t lower_bound = switch_instr->GetStartValue();
8631   uint32_t num_entries = switch_instr->GetNumEntries();
8632   LocationSummary* locations = switch_instr->GetLocations();
8633   Register value_reg = locations->InAt(0).AsRegister<Register>();
8634   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8635 
8636   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8637     GenPackedSwitchWithCompares(value_reg,
8638                                 lower_bound,
8639                                 num_entries,
8640                                 switch_instr->GetBlock(),
8641                                 default_block);
8642     return;
8643   }
8644 
8645   // Optimizing has a jump area.
8646   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8647   Register constant_area = locations->InAt(1).AsRegister<Register>();
8648 
8649   // Remove the bias, if needed.
8650   if (lower_bound != 0) {
8651     __ leal(temp_reg, Address(value_reg, -lower_bound));
8652     value_reg = temp_reg;
8653   }
8654 
8655   // Is the value in range?
8656   DCHECK_GE(num_entries, 1u);
8657   __ cmpl(value_reg, Immediate(num_entries - 1));
8658   __ j(kAbove, codegen_->GetLabelOf(default_block));
8659 
8660   // We are in the range of the table.
8661   // Load (target-constant_area) from the jump table, indexing by the value.
8662   __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8663 
8664   // Compute the actual target address by adding in constant_area.
8665   __ addl(temp_reg, constant_area);
8666 
8667   // And jump.
8668   __ jmp(temp_reg);
8669 }
8670 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8671 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8672     HX86ComputeBaseMethodAddress* insn) {
8673   LocationSummary* locations =
8674       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8675   locations->SetOut(Location::RequiresRegister());
8676 }
8677 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8678 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8679     HX86ComputeBaseMethodAddress* insn) {
8680   LocationSummary* locations = insn->GetLocations();
8681   Register reg = locations->Out().AsRegister<Register>();
8682 
8683   // Generate call to next instruction.
8684   Label next_instruction;
8685   __ call(&next_instruction);
8686   __ Bind(&next_instruction);
8687 
8688   // Remember this offset for later use with constant area.
8689   codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8690 
8691   // Grab the return address off the stack.
8692   __ popl(reg);
8693 }
8694 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8695 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8696     HX86LoadFromConstantTable* insn) {
8697   LocationSummary* locations =
8698       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8699 
8700   locations->SetInAt(0, Location::RequiresRegister());
8701   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8702 
8703   // If we don't need to be materialized, we only need the inputs to be set.
8704   if (insn->IsEmittedAtUseSite()) {
8705     return;
8706   }
8707 
8708   switch (insn->GetType()) {
8709     case DataType::Type::kFloat32:
8710     case DataType::Type::kFloat64:
8711       locations->SetOut(Location::RequiresFpuRegister());
8712       break;
8713 
8714     case DataType::Type::kInt32:
8715       locations->SetOut(Location::RequiresRegister());
8716       break;
8717 
8718     default:
8719       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8720   }
8721 }
8722 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8723 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8724   if (insn->IsEmittedAtUseSite()) {
8725     return;
8726   }
8727 
8728   LocationSummary* locations = insn->GetLocations();
8729   Location out = locations->Out();
8730   Register const_area = locations->InAt(0).AsRegister<Register>();
8731   HConstant *value = insn->GetConstant();
8732 
8733   switch (insn->GetType()) {
8734     case DataType::Type::kFloat32:
8735       __ movss(out.AsFpuRegister<XmmRegister>(),
8736                codegen_->LiteralFloatAddress(
8737                   value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8738       break;
8739 
8740     case DataType::Type::kFloat64:
8741       __ movsd(out.AsFpuRegister<XmmRegister>(),
8742                codegen_->LiteralDoubleAddress(
8743                   value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8744       break;
8745 
8746     case DataType::Type::kInt32:
8747       __ movl(out.AsRegister<Register>(),
8748               codegen_->LiteralInt32Address(
8749                   value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8750       break;
8751 
8752     default:
8753       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8754   }
8755 }
8756 
8757 /**
8758  * Class to handle late fixup of offsets into constant area.
8759  */
8760 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8761  public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8762   RIPFixup(CodeGeneratorX86& codegen,
8763            HX86ComputeBaseMethodAddress* base_method_address,
8764            size_t offset)
8765       : codegen_(&codegen),
8766         base_method_address_(base_method_address),
8767         offset_into_constant_area_(offset) {}
8768 
8769  protected:
SetOffset(size_t offset)8770   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8771 
8772   CodeGeneratorX86* codegen_;
8773   HX86ComputeBaseMethodAddress* base_method_address_;
8774 
8775  private:
Process(const MemoryRegion & region,int pos)8776   void Process(const MemoryRegion& region, int pos) override {
8777     // Patch the correct offset for the instruction.  The place to patch is the
8778     // last 4 bytes of the instruction.
8779     // The value to patch is the distance from the offset in the constant area
8780     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8781     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8782     int32_t relative_position =
8783         constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8784 
8785     // Patch in the right value.
8786     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8787   }
8788 
8789   // Location in constant area that the fixup refers to.
8790   int32_t offset_into_constant_area_;
8791 };
8792 
8793 /**
8794  * Class to handle late fixup of offsets to a jump table that will be created in the
8795  * constant area.
8796  */
8797 class JumpTableRIPFixup : public RIPFixup {
8798  public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8799   JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8800       : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8801         switch_instr_(switch_instr) {}
8802 
CreateJumpTable()8803   void CreateJumpTable() {
8804     X86Assembler* assembler = codegen_->GetAssembler();
8805 
8806     // Ensure that the reference to the jump table has the correct offset.
8807     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8808     SetOffset(offset_in_constant_table);
8809 
8810     // The label values in the jump table are computed relative to the
8811     // instruction addressing the constant area.
8812     const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8813 
8814     // Populate the jump table with the correct values for the jump table.
8815     int32_t num_entries = switch_instr_->GetNumEntries();
8816     HBasicBlock* block = switch_instr_->GetBlock();
8817     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8818     // The value that we want is the target offset - the position of the table.
8819     for (int32_t i = 0; i < num_entries; i++) {
8820       HBasicBlock* b = successors[i];
8821       Label* l = codegen_->GetLabelOf(b);
8822       DCHECK(l->IsBound());
8823       int32_t offset_to_block = l->Position() - relative_offset;
8824       assembler->AppendInt32(offset_to_block);
8825     }
8826   }
8827 
8828  private:
8829   const HX86PackedSwitch* switch_instr_;
8830 };
8831 
Finalize(CodeAllocator * allocator)8832 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8833   // Generate the constant area if needed.
8834   X86Assembler* assembler = GetAssembler();
8835 
8836   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8837     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8838     // byte values.
8839     assembler->Align(4, 0);
8840     constant_area_start_ = assembler->CodeSize();
8841 
8842     // Populate any jump tables.
8843     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8844       jump_table->CreateJumpTable();
8845     }
8846 
8847     // And now add the constant area to the generated code.
8848     assembler->AddConstantArea();
8849   }
8850 
8851   // And finish up.
8852   CodeGenerator::Finalize(allocator);
8853 }
8854 
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8855 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8856                                                HX86ComputeBaseMethodAddress* method_base,
8857                                                Register reg) {
8858   AssemblerFixup* fixup =
8859       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8860   return Address(reg, kPlaceholder32BitOffset, fixup);
8861 }
8862 
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8863 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8864                                               HX86ComputeBaseMethodAddress* method_base,
8865                                               Register reg) {
8866   AssemblerFixup* fixup =
8867       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8868   return Address(reg, kPlaceholder32BitOffset, fixup);
8869 }
8870 
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8871 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8872                                               HX86ComputeBaseMethodAddress* method_base,
8873                                               Register reg) {
8874   AssemblerFixup* fixup =
8875       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8876   return Address(reg, kPlaceholder32BitOffset, fixup);
8877 }
8878 
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8879 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8880                                               HX86ComputeBaseMethodAddress* method_base,
8881                                               Register reg) {
8882   AssemblerFixup* fixup =
8883       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8884   return Address(reg, kPlaceholder32BitOffset, fixup);
8885 }
8886 
Load32BitValue(Register dest,int32_t value)8887 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8888   if (value == 0) {
8889     __ xorl(dest, dest);
8890   } else {
8891     __ movl(dest, Immediate(value));
8892   }
8893 }
8894 
Compare32BitValue(Register dest,int32_t value)8895 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8896   if (value == 0) {
8897     __ testl(dest, dest);
8898   } else {
8899     __ cmpl(dest, Immediate(value));
8900   }
8901 }
8902 
GenerateIntCompare(Location lhs,Location rhs)8903 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8904   Register lhs_reg = lhs.AsRegister<Register>();
8905   GenerateIntCompare(lhs_reg, rhs);
8906 }
8907 
GenerateIntCompare(Register lhs,Location rhs)8908 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8909   if (rhs.IsConstant()) {
8910     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8911     Compare32BitValue(lhs, value);
8912   } else if (rhs.IsStackSlot()) {
8913     __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8914   } else {
8915     __ cmpl(lhs, rhs.AsRegister<Register>());
8916   }
8917 }
8918 
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8919 Address CodeGeneratorX86::ArrayAddress(Register obj,
8920                                        Location index,
8921                                        ScaleFactor scale,
8922                                        uint32_t data_offset) {
8923   return index.IsConstant() ?
8924       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8925       Address(obj, index.AsRegister<Register>(), scale, data_offset);
8926 }
8927 
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8928 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8929                                            Register reg,
8930                                            Register value) {
8931   // Create a fixup to be used to create and address the jump table.
8932   JumpTableRIPFixup* table_fixup =
8933       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8934 
8935   // We have to populate the jump tables.
8936   fixups_to_jump_tables_.push_back(table_fixup);
8937 
8938   // We want a scaled address, as we are extracting the correct offset from the table.
8939   return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8940 }
8941 
8942 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8943 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8944   if (!target.IsValid()) {
8945     DCHECK_EQ(type, DataType::Type::kVoid);
8946     return;
8947   }
8948 
8949   DCHECK_NE(type, DataType::Type::kVoid);
8950 
8951   Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8952   if (target.Equals(return_loc)) {
8953     return;
8954   }
8955 
8956   // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8957   //       with the else branch.
8958   if (type == DataType::Type::kInt64) {
8959     HParallelMove parallel_move(GetGraph()->GetAllocator());
8960     parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8961     parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8962     GetMoveResolver()->EmitNativeCode(&parallel_move);
8963   } else {
8964     // Let the parallel move resolver take care of all of this.
8965     HParallelMove parallel_move(GetGraph()->GetAllocator());
8966     parallel_move.AddMove(return_loc, target, type, nullptr);
8967     GetMoveResolver()->EmitNativeCode(&parallel_move);
8968   }
8969 }
8970 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8971 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8972                                        const uint8_t* roots_data,
8973                                        const PatchInfo<Label>& info,
8974                                        uint64_t index_in_table) const {
8975   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8976   uintptr_t address =
8977       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8978   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8979   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8980      dchecked_integral_cast<uint32_t>(address);
8981 }
8982 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8983 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8984   for (const PatchInfo<Label>& info : jit_string_patches_) {
8985     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8986     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8987     PatchJitRootUse(code, roots_data, info, index_in_table);
8988   }
8989 
8990   for (const PatchInfo<Label>& info : jit_class_patches_) {
8991     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8992     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8993     PatchJitRootUse(code, roots_data, info, index_in_table);
8994   }
8995 }
8996 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8997 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8998                                                    ATTRIBUTE_UNUSED) {
8999   LOG(FATAL) << "Unreachable";
9000 }
9001 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9002 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9003                                                            ATTRIBUTE_UNUSED) {
9004   LOG(FATAL) << "Unreachable";
9005 }
9006 
CpuHasAvxFeatureFlag()9007 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9008   return codegen_->GetInstructionSetFeatures().HasAVX();
9009 }
CpuHasAvx2FeatureFlag()9010 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9011   return codegen_->GetInstructionSetFeatures().HasAVX2();
9012 }
CpuHasAvxFeatureFlag()9013 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9014   return codegen_->GetInstructionSetFeatures().HasAVX();
9015 }
CpuHasAvx2FeatureFlag()9016 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9017   return codegen_->GetInstructionSetFeatures().HasAVX2();
9018 }
9019 
9020 #undef __
9021 
9022 }  // namespace x86
9023 }  // namespace art
9024