1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method-inl.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "gc/space/image_space.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_x86_64.h"
29 #include "jit/profiling_info.h"
30 #include "linker/linker_patch.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/class-inl.h"
34 #include "mirror/object_reference.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread.h"
37 #include "utils/assembler.h"
38 #include "utils/stack_checks.h"
39 #include "utils/x86_64/assembler_x86_64.h"
40 #include "utils/x86_64/managed_register_x86_64.h"
41 
42 namespace art {
43 
44 template<class MirrorType>
45 class GcRoot;
46 
47 namespace x86_64 {
48 
49 static constexpr int kCurrentMethodStackOffset = 0;
50 static constexpr Register kMethodRegisterArgument = RDI;
51 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
52 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
53 // generates less code/data with a small num_entries.
54 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
55 
56 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
57 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
58 
59 static constexpr int kC2ConditionMask = 0x400;
60 
OneRegInReferenceOutSaveEverythingCallerSaves()61 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
62   // Custom calling convention: RAX serves as both input and output.
63   RegisterSet caller_saves = RegisterSet::Empty();
64   caller_saves.Add(Location::RegisterLocation(RAX));
65   return caller_saves;
66 }
67 
68 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
69 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
70 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
71 
72 class NullCheckSlowPathX86_64 : public SlowPathCode {
73  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)74   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
75 
EmitNativeCode(CodeGenerator * codegen)76   void EmitNativeCode(CodeGenerator* codegen) override {
77     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
78     __ Bind(GetEntryLabel());
79     if (instruction_->CanThrowIntoCatchBlock()) {
80       // Live registers will be restored in the catch block if caught.
81       SaveLiveRegisters(codegen, instruction_->GetLocations());
82     }
83     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
84                                   instruction_,
85                                   instruction_->GetDexPc(),
86                                   this);
87     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
88   }
89 
IsFatal() const90   bool IsFatal() const override { return true; }
91 
GetDescription() const92   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
93 
94  private:
95   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
96 };
97 
98 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
99  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)100   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
101 
EmitNativeCode(CodeGenerator * codegen)102   void EmitNativeCode(CodeGenerator* codegen) override {
103     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
104     __ Bind(GetEntryLabel());
105     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
106     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
107   }
108 
IsFatal() const109   bool IsFatal() const override { return true; }
110 
GetDescription() const111   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
112 
113  private:
114   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
115 };
116 
117 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
118  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)119   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
120       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
121 
EmitNativeCode(CodeGenerator * codegen)122   void EmitNativeCode(CodeGenerator* codegen) override {
123     __ Bind(GetEntryLabel());
124     if (type_ == DataType::Type::kInt32) {
125       if (is_div_) {
126         __ negl(cpu_reg_);
127       } else {
128         __ xorl(cpu_reg_, cpu_reg_);
129       }
130 
131     } else {
132       DCHECK_EQ(DataType::Type::kInt64, type_);
133       if (is_div_) {
134         __ negq(cpu_reg_);
135       } else {
136         __ xorl(cpu_reg_, cpu_reg_);
137       }
138     }
139     __ jmp(GetExitLabel());
140   }
141 
GetDescription() const142   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
143 
144  private:
145   const CpuRegister cpu_reg_;
146   const DataType::Type type_;
147   const bool is_div_;
148   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
149 };
150 
151 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
152  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)153   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
154       : SlowPathCode(instruction), successor_(successor) {}
155 
EmitNativeCode(CodeGenerator * codegen)156   void EmitNativeCode(CodeGenerator* codegen) override {
157     LocationSummary* locations = instruction_->GetLocations();
158     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
159     __ Bind(GetEntryLabel());
160     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
161     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
162     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
163     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
164     if (successor_ == nullptr) {
165       __ jmp(GetReturnLabel());
166     } else {
167       __ jmp(x86_64_codegen->GetLabelOf(successor_));
168     }
169   }
170 
GetReturnLabel()171   Label* GetReturnLabel() {
172     DCHECK(successor_ == nullptr);
173     return &return_label_;
174   }
175 
GetSuccessor() const176   HBasicBlock* GetSuccessor() const {
177     return successor_;
178   }
179 
GetDescription() const180   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
181 
182  private:
183   HBasicBlock* const successor_;
184   Label return_label_;
185 
186   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
187 };
188 
189 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
190  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)191   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
192     : SlowPathCode(instruction) {}
193 
EmitNativeCode(CodeGenerator * codegen)194   void EmitNativeCode(CodeGenerator* codegen) override {
195     LocationSummary* locations = instruction_->GetLocations();
196     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
197     __ Bind(GetEntryLabel());
198     if (instruction_->CanThrowIntoCatchBlock()) {
199       // Live registers will be restored in the catch block if caught.
200       SaveLiveRegisters(codegen, instruction_->GetLocations());
201     }
202     // Are we using an array length from memory?
203     HInstruction* array_length = instruction_->InputAt(1);
204     Location length_loc = locations->InAt(1);
205     InvokeRuntimeCallingConvention calling_convention;
206     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
207       // Load the array length into our temporary.
208       HArrayLength* length = array_length->AsArrayLength();
209       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
210       Location array_loc = array_length->GetLocations()->InAt(0);
211       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
212       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
213       // Check for conflicts with index.
214       if (length_loc.Equals(locations->InAt(0))) {
215         // We know we aren't using parameter 2.
216         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
217       }
218       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
219       if (mirror::kUseStringCompression && length->IsStringLength()) {
220         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
221       }
222     }
223 
224     // We're moving two locations to locations that could overlap, so we need a parallel
225     // move resolver.
226     codegen->EmitParallelMoves(
227         locations->InAt(0),
228         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
229         DataType::Type::kInt32,
230         length_loc,
231         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
232         DataType::Type::kInt32);
233     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
234         ? kQuickThrowStringBounds
235         : kQuickThrowArrayBounds;
236     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
237     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
238     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
239   }
240 
IsFatal() const241   bool IsFatal() const override { return true; }
242 
GetDescription() const243   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
244 
245  private:
246   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
247 };
248 
249 class LoadClassSlowPathX86_64 : public SlowPathCode {
250  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)251   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
252       : SlowPathCode(at), cls_(cls) {
253     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
254     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
255   }
256 
EmitNativeCode(CodeGenerator * codegen)257   void EmitNativeCode(CodeGenerator* codegen) override {
258     LocationSummary* locations = instruction_->GetLocations();
259     Location out = locations->Out();
260     const uint32_t dex_pc = instruction_->GetDexPc();
261     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
262     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
263 
264     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
265     __ Bind(GetEntryLabel());
266     SaveLiveRegisters(codegen, locations);
267 
268     // Custom calling convention: RAX serves as both input and output.
269     if (must_resolve_type) {
270       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
271       dex::TypeIndex type_index = cls_->GetTypeIndex();
272       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
273       x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
274       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
275       // If we also must_do_clinit, the resolved type is now in the correct register.
276     } else {
277       DCHECK(must_do_clinit);
278       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
279       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
280     }
281     if (must_do_clinit) {
282       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
283       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
284     }
285 
286     // Move the class to the desired location.
287     if (out.IsValid()) {
288       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
289       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
290     }
291 
292     RestoreLiveRegisters(codegen, locations);
293     __ jmp(GetExitLabel());
294   }
295 
GetDescription() const296   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
297 
298  private:
299   // The class this slow path will load.
300   HLoadClass* const cls_;
301 
302   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
303 };
304 
305 class LoadStringSlowPathX86_64 : public SlowPathCode {
306  public:
LoadStringSlowPathX86_64(HLoadString * instruction)307   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
308 
EmitNativeCode(CodeGenerator * codegen)309   void EmitNativeCode(CodeGenerator* codegen) override {
310     LocationSummary* locations = instruction_->GetLocations();
311     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
312 
313     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
314     __ Bind(GetEntryLabel());
315     SaveLiveRegisters(codegen, locations);
316 
317     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
318     // Custom calling convention: RAX serves as both input and output.
319     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
320     x86_64_codegen->InvokeRuntime(kQuickResolveString,
321                                   instruction_,
322                                   instruction_->GetDexPc(),
323                                   this);
324     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
325     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
326     RestoreLiveRegisters(codegen, locations);
327 
328     __ jmp(GetExitLabel());
329   }
330 
GetDescription() const331   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
332 
333  private:
334   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
335 };
336 
337 class TypeCheckSlowPathX86_64 : public SlowPathCode {
338  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)339   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
340       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
341 
EmitNativeCode(CodeGenerator * codegen)342   void EmitNativeCode(CodeGenerator* codegen) override {
343     LocationSummary* locations = instruction_->GetLocations();
344     uint32_t dex_pc = instruction_->GetDexPc();
345     DCHECK(instruction_->IsCheckCast()
346            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
347 
348     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
349     __ Bind(GetEntryLabel());
350 
351     if (kPoisonHeapReferences &&
352         instruction_->IsCheckCast() &&
353         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
354       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
355       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
356     }
357 
358     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
359       SaveLiveRegisters(codegen, locations);
360     }
361 
362     // We're moving two locations to locations that could overlap, so we need a parallel
363     // move resolver.
364     InvokeRuntimeCallingConvention calling_convention;
365     codegen->EmitParallelMoves(locations->InAt(0),
366                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
367                                DataType::Type::kReference,
368                                locations->InAt(1),
369                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
370                                DataType::Type::kReference);
371     if (instruction_->IsInstanceOf()) {
372       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
373       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
374     } else {
375       DCHECK(instruction_->IsCheckCast());
376       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
377       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
378     }
379 
380     if (!is_fatal_) {
381       if (instruction_->IsInstanceOf()) {
382         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
383       }
384 
385       RestoreLiveRegisters(codegen, locations);
386       __ jmp(GetExitLabel());
387     }
388   }
389 
GetDescription() const390   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
391 
IsFatal() const392   bool IsFatal() const override { return is_fatal_; }
393 
394  private:
395   const bool is_fatal_;
396 
397   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
398 };
399 
400 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
401  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)402   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
403       : SlowPathCode(instruction) {}
404 
EmitNativeCode(CodeGenerator * codegen)405   void EmitNativeCode(CodeGenerator* codegen) override {
406     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
407     __ Bind(GetEntryLabel());
408     LocationSummary* locations = instruction_->GetLocations();
409     SaveLiveRegisters(codegen, locations);
410     InvokeRuntimeCallingConvention calling_convention;
411     x86_64_codegen->Load32BitValue(
412         CpuRegister(calling_convention.GetRegisterAt(0)),
413         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
414     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
415     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
416   }
417 
GetDescription() const418   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
419 
420  private:
421   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
422 };
423 
424 class ArraySetSlowPathX86_64 : public SlowPathCode {
425  public:
ArraySetSlowPathX86_64(HInstruction * instruction)426   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
427 
EmitNativeCode(CodeGenerator * codegen)428   void EmitNativeCode(CodeGenerator* codegen) override {
429     LocationSummary* locations = instruction_->GetLocations();
430     __ Bind(GetEntryLabel());
431     SaveLiveRegisters(codegen, locations);
432 
433     InvokeRuntimeCallingConvention calling_convention;
434     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
435     parallel_move.AddMove(
436         locations->InAt(0),
437         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
438         DataType::Type::kReference,
439         nullptr);
440     parallel_move.AddMove(
441         locations->InAt(1),
442         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
443         DataType::Type::kInt32,
444         nullptr);
445     parallel_move.AddMove(
446         locations->InAt(2),
447         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
448         DataType::Type::kReference,
449         nullptr);
450     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
451 
452     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
453     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
454     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
455     RestoreLiveRegisters(codegen, locations);
456     __ jmp(GetExitLabel());
457   }
458 
GetDescription() const459   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
460 
461  private:
462   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
463 };
464 
465 // Slow path marking an object reference `ref` during a read
466 // barrier. The field `obj.field` in the object `obj` holding this
467 // reference does not get updated by this slow path after marking (see
468 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
469 //
470 // This means that after the execution of this slow path, `ref` will
471 // always be up-to-date, but `obj.field` may not; i.e., after the
472 // flip, `ref` will be a to-space reference, but `obj.field` will
473 // probably still be a from-space reference (unless it gets updated by
474 // another thread, or if another thread installed another object
475 // reference (different from `ref`) in `obj.field`).
476 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
477  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)478   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
479                                 Location ref,
480                                 bool unpoison_ref_before_marking)
481       : SlowPathCode(instruction),
482         ref_(ref),
483         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
484     DCHECK(kEmitCompilerReadBarrier);
485   }
486 
GetDescription() const487   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
488 
EmitNativeCode(CodeGenerator * codegen)489   void EmitNativeCode(CodeGenerator* codegen) override {
490     LocationSummary* locations = instruction_->GetLocations();
491     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
492     Register ref_reg = ref_cpu_reg.AsRegister();
493     DCHECK(locations->CanCall());
494     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
495     DCHECK(instruction_->IsInstanceFieldGet() ||
496            instruction_->IsStaticFieldGet() ||
497            instruction_->IsArrayGet() ||
498            instruction_->IsArraySet() ||
499            instruction_->IsLoadClass() ||
500            instruction_->IsLoadString() ||
501            instruction_->IsInstanceOf() ||
502            instruction_->IsCheckCast() ||
503            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
504            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
505         << "Unexpected instruction in read barrier marking slow path: "
506         << instruction_->DebugName();
507 
508     __ Bind(GetEntryLabel());
509     if (unpoison_ref_before_marking_) {
510       // Object* ref = ref_addr->AsMirrorPtr()
511       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
512     }
513     // No need to save live registers; it's taken care of by the
514     // entrypoint. Also, there is no need to update the stack mask,
515     // as this runtime call will not trigger a garbage collection.
516     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517     DCHECK_NE(ref_reg, RSP);
518     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
519     // "Compact" slow path, saving two moves.
520     //
521     // Instead of using the standard runtime calling convention (input
522     // and output in R0):
523     //
524     //   RDI <- ref
525     //   RAX <- ReadBarrierMark(RDI)
526     //   ref <- RAX
527     //
528     // we just use rX (the register containing `ref`) as input and output
529     // of a dedicated entrypoint:
530     //
531     //   rX <- ReadBarrierMarkRegX(rX)
532     //
533     int32_t entry_point_offset =
534         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
535     // This runtime call does not require a stack map.
536     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
537     __ jmp(GetExitLabel());
538   }
539 
540  private:
541   // The location (register) of the marked object reference.
542   const Location ref_;
543   // Should the reference in `ref_` be unpoisoned prior to marking it?
544   const bool unpoison_ref_before_marking_;
545 
546   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
547 };
548 
549 // Slow path marking an object reference `ref` during a read barrier,
550 // and if needed, atomically updating the field `obj.field` in the
551 // object `obj` holding this reference after marking (contrary to
552 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
553 // `obj.field`).
554 //
555 // This means that after the execution of this slow path, both `ref`
556 // and `obj.field` will be up-to-date; i.e., after the flip, both will
557 // hold the same to-space reference (unless another thread installed
558 // another object reference (different from `ref`) in `obj.field`).
559 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
560  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)561   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
562                                               Location ref,
563                                               CpuRegister obj,
564                                               const Address& field_addr,
565                                               bool unpoison_ref_before_marking,
566                                               CpuRegister temp1,
567                                               CpuRegister temp2)
568       : SlowPathCode(instruction),
569         ref_(ref),
570         obj_(obj),
571         field_addr_(field_addr),
572         unpoison_ref_before_marking_(unpoison_ref_before_marking),
573         temp1_(temp1),
574         temp2_(temp2) {
575     DCHECK(kEmitCompilerReadBarrier);
576   }
577 
GetDescription() const578   const char* GetDescription() const override {
579     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
580   }
581 
EmitNativeCode(CodeGenerator * codegen)582   void EmitNativeCode(CodeGenerator* codegen) override {
583     LocationSummary* locations = instruction_->GetLocations();
584     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
585     Register ref_reg = ref_cpu_reg.AsRegister();
586     DCHECK(locations->CanCall());
587     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
588     // This slow path is only used by the UnsafeCASObject intrinsic.
589     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
590         << "Unexpected instruction in read barrier marking and field updating slow path: "
591         << instruction_->DebugName();
592     DCHECK(instruction_->GetLocations()->Intrinsified());
593     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
594 
595     __ Bind(GetEntryLabel());
596     if (unpoison_ref_before_marking_) {
597       // Object* ref = ref_addr->AsMirrorPtr()
598       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
599     }
600 
601     // Save the old (unpoisoned) reference.
602     __ movl(temp1_, ref_cpu_reg);
603 
604     // No need to save live registers; it's taken care of by the
605     // entrypoint. Also, there is no need to update the stack mask,
606     // as this runtime call will not trigger a garbage collection.
607     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
608     DCHECK_NE(ref_reg, RSP);
609     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
610     // "Compact" slow path, saving two moves.
611     //
612     // Instead of using the standard runtime calling convention (input
613     // and output in R0):
614     //
615     //   RDI <- ref
616     //   RAX <- ReadBarrierMark(RDI)
617     //   ref <- RAX
618     //
619     // we just use rX (the register containing `ref`) as input and output
620     // of a dedicated entrypoint:
621     //
622     //   rX <- ReadBarrierMarkRegX(rX)
623     //
624     int32_t entry_point_offset =
625         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
626     // This runtime call does not require a stack map.
627     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
628 
629     // If the new reference is different from the old reference,
630     // update the field in the holder (`*field_addr`).
631     //
632     // Note that this field could also hold a different object, if
633     // another thread had concurrently changed it. In that case, the
634     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
635     // operation below would abort the CAS, leaving the field as-is.
636     NearLabel done;
637     __ cmpl(temp1_, ref_cpu_reg);
638     __ j(kEqual, &done);
639 
640     // Update the the holder's field atomically.  This may fail if
641     // mutator updates before us, but it's OK.  This is achived
642     // using a strong compare-and-set (CAS) operation with relaxed
643     // memory synchronization ordering, where the expected value is
644     // the old reference and the desired value is the new reference.
645     // This operation is implemented with a 32-bit LOCK CMPXLCHG
646     // instruction, which requires the expected value (the old
647     // reference) to be in EAX.  Save RAX beforehand, and move the
648     // expected value (stored in `temp1_`) into EAX.
649     __ movq(temp2_, CpuRegister(RAX));
650     __ movl(CpuRegister(RAX), temp1_);
651 
652     // Convenience aliases.
653     CpuRegister base = obj_;
654     CpuRegister expected = CpuRegister(RAX);
655     CpuRegister value = ref_cpu_reg;
656 
657     bool base_equals_value = (base.AsRegister() == value.AsRegister());
658     Register value_reg = ref_reg;
659     if (kPoisonHeapReferences) {
660       if (base_equals_value) {
661         // If `base` and `value` are the same register location, move
662         // `value_reg` to a temporary register.  This way, poisoning
663         // `value_reg` won't invalidate `base`.
664         value_reg = temp1_.AsRegister();
665         __ movl(CpuRegister(value_reg), base);
666       }
667 
668       // Check that the register allocator did not assign the location
669       // of `expected` (RAX) to `value` nor to `base`, so that heap
670       // poisoning (when enabled) works as intended below.
671       // - If `value` were equal to `expected`, both references would
672       //   be poisoned twice, meaning they would not be poisoned at
673       //   all, as heap poisoning uses address negation.
674       // - If `base` were equal to `expected`, poisoning `expected`
675       //   would invalidate `base`.
676       DCHECK_NE(value_reg, expected.AsRegister());
677       DCHECK_NE(base.AsRegister(), expected.AsRegister());
678 
679       __ PoisonHeapReference(expected);
680       __ PoisonHeapReference(CpuRegister(value_reg));
681     }
682 
683     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
684 
685     // If heap poisoning is enabled, we need to unpoison the values
686     // that were poisoned earlier.
687     if (kPoisonHeapReferences) {
688       if (base_equals_value) {
689         // `value_reg` has been moved to a temporary register, no need
690         // to unpoison it.
691       } else {
692         __ UnpoisonHeapReference(CpuRegister(value_reg));
693       }
694       // No need to unpoison `expected` (RAX), as it is be overwritten below.
695     }
696 
697     // Restore RAX.
698     __ movq(CpuRegister(RAX), temp2_);
699 
700     __ Bind(&done);
701     __ jmp(GetExitLabel());
702   }
703 
704  private:
705   // The location (register) of the marked object reference.
706   const Location ref_;
707   // The register containing the object holding the marked object reference field.
708   const CpuRegister obj_;
709   // The address of the marked reference field.  The base of this address must be `obj_`.
710   const Address field_addr_;
711 
712   // Should the reference in `ref_` be unpoisoned prior to marking it?
713   const bool unpoison_ref_before_marking_;
714 
715   const CpuRegister temp1_;
716   const CpuRegister temp2_;
717 
718   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
719 };
720 
721 // Slow path generating a read barrier for a heap reference.
722 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
723  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)724   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
725                                             Location out,
726                                             Location ref,
727                                             Location obj,
728                                             uint32_t offset,
729                                             Location index)
730       : SlowPathCode(instruction),
731         out_(out),
732         ref_(ref),
733         obj_(obj),
734         offset_(offset),
735         index_(index) {
736     DCHECK(kEmitCompilerReadBarrier);
737     // If `obj` is equal to `out` or `ref`, it means the initial
738     // object has been overwritten by (or after) the heap object
739     // reference load to be instrumented, e.g.:
740     //
741     //   __ movl(out, Address(out, offset));
742     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
743     //
744     // In that case, we have lost the information about the original
745     // object, and the emitted read barrier cannot work properly.
746     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
747     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
748 }
749 
EmitNativeCode(CodeGenerator * codegen)750   void EmitNativeCode(CodeGenerator* codegen) override {
751     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
752     LocationSummary* locations = instruction_->GetLocations();
753     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
754     DCHECK(locations->CanCall());
755     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
756     DCHECK(instruction_->IsInstanceFieldGet() ||
757            instruction_->IsStaticFieldGet() ||
758            instruction_->IsArrayGet() ||
759            instruction_->IsInstanceOf() ||
760            instruction_->IsCheckCast() ||
761            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
762         << "Unexpected instruction in read barrier for heap reference slow path: "
763         << instruction_->DebugName();
764 
765     __ Bind(GetEntryLabel());
766     SaveLiveRegisters(codegen, locations);
767 
768     // We may have to change the index's value, but as `index_` is a
769     // constant member (like other "inputs" of this slow path),
770     // introduce a copy of it, `index`.
771     Location index = index_;
772     if (index_.IsValid()) {
773       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
774       if (instruction_->IsArrayGet()) {
775         // Compute real offset and store it in index_.
776         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
777         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
778         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
779           // We are about to change the value of `index_reg` (see the
780           // calls to art::x86_64::X86_64Assembler::shll and
781           // art::x86_64::X86_64Assembler::AddImmediate below), but it
782           // has not been saved by the previous call to
783           // art::SlowPathCode::SaveLiveRegisters, as it is a
784           // callee-save register --
785           // art::SlowPathCode::SaveLiveRegisters does not consider
786           // callee-save registers, as it has been designed with the
787           // assumption that callee-save registers are supposed to be
788           // handled by the called function.  So, as a callee-save
789           // register, `index_reg` _would_ eventually be saved onto
790           // the stack, but it would be too late: we would have
791           // changed its value earlier.  Therefore, we manually save
792           // it here into another freely available register,
793           // `free_reg`, chosen of course among the caller-save
794           // registers (as a callee-save `free_reg` register would
795           // exhibit the same problem).
796           //
797           // Note we could have requested a temporary register from
798           // the register allocator instead; but we prefer not to, as
799           // this is a slow path, and we know we can find a
800           // caller-save register that is available.
801           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
802           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
803           index_reg = free_reg;
804           index = Location::RegisterLocation(index_reg);
805         } else {
806           // The initial register stored in `index_` has already been
807           // saved in the call to art::SlowPathCode::SaveLiveRegisters
808           // (as it is not a callee-save register), so we can freely
809           // use it.
810         }
811         // Shifting the index value contained in `index_reg` by the
812         // scale factor (2) cannot overflow in practice, as the
813         // runtime is unable to allocate object arrays with a size
814         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
815         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
816         static_assert(
817             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
818             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
819         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
820       } else {
821         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
822         // intrinsics, `index_` is not shifted by a scale factor of 2
823         // (as in the case of ArrayGet), as it is actually an offset
824         // to an object field within an object.
825         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
826         DCHECK(instruction_->GetLocations()->Intrinsified());
827         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
828                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
829             << instruction_->AsInvoke()->GetIntrinsic();
830         DCHECK_EQ(offset_, 0U);
831         DCHECK(index_.IsRegister());
832       }
833     }
834 
835     // We're moving two or three locations to locations that could
836     // overlap, so we need a parallel move resolver.
837     InvokeRuntimeCallingConvention calling_convention;
838     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839     parallel_move.AddMove(ref_,
840                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
841                           DataType::Type::kReference,
842                           nullptr);
843     parallel_move.AddMove(obj_,
844                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
845                           DataType::Type::kReference,
846                           nullptr);
847     if (index.IsValid()) {
848       parallel_move.AddMove(index,
849                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
850                             DataType::Type::kInt32,
851                             nullptr);
852       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
853     } else {
854       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
855       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
856     }
857     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
858                                   instruction_,
859                                   instruction_->GetDexPc(),
860                                   this);
861     CheckEntrypointTypes<
862         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
863     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
864 
865     RestoreLiveRegisters(codegen, locations);
866     __ jmp(GetExitLabel());
867   }
868 
GetDescription() const869   const char* GetDescription() const override {
870     return "ReadBarrierForHeapReferenceSlowPathX86_64";
871   }
872 
873  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)874   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
875     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
876     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
877     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
878       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
879         return static_cast<CpuRegister>(i);
880       }
881     }
882     // We shall never fail to find a free caller-save register, as
883     // there are more than two core caller-save registers on x86-64
884     // (meaning it is possible to find one which is different from
885     // `ref` and `obj`).
886     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
887     LOG(FATAL) << "Could not find a free caller-save register";
888     UNREACHABLE();
889   }
890 
891   const Location out_;
892   const Location ref_;
893   const Location obj_;
894   const uint32_t offset_;
895   // An additional location containing an index to an array.
896   // Only used for HArrayGet and the UnsafeGetObject &
897   // UnsafeGetObjectVolatile intrinsics.
898   const Location index_;
899 
900   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
901 };
902 
903 // Slow path generating a read barrier for a GC root.
904 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
905  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)906   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
907       : SlowPathCode(instruction), out_(out), root_(root) {
908     DCHECK(kEmitCompilerReadBarrier);
909   }
910 
EmitNativeCode(CodeGenerator * codegen)911   void EmitNativeCode(CodeGenerator* codegen) override {
912     LocationSummary* locations = instruction_->GetLocations();
913     DCHECK(locations->CanCall());
914     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
915     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
916         << "Unexpected instruction in read barrier for GC root slow path: "
917         << instruction_->DebugName();
918 
919     __ Bind(GetEntryLabel());
920     SaveLiveRegisters(codegen, locations);
921 
922     InvokeRuntimeCallingConvention calling_convention;
923     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
924     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
925     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
926                                   instruction_,
927                                   instruction_->GetDexPc(),
928                                   this);
929     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
930     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
931 
932     RestoreLiveRegisters(codegen, locations);
933     __ jmp(GetExitLabel());
934   }
935 
GetDescription() const936   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
937 
938  private:
939   const Location out_;
940   const Location root_;
941 
942   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
943 };
944 
945 #undef __
946 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
947 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
948 
X86_64IntegerCondition(IfCondition cond)949 inline Condition X86_64IntegerCondition(IfCondition cond) {
950   switch (cond) {
951     case kCondEQ: return kEqual;
952     case kCondNE: return kNotEqual;
953     case kCondLT: return kLess;
954     case kCondLE: return kLessEqual;
955     case kCondGT: return kGreater;
956     case kCondGE: return kGreaterEqual;
957     case kCondB:  return kBelow;
958     case kCondBE: return kBelowEqual;
959     case kCondA:  return kAbove;
960     case kCondAE: return kAboveEqual;
961   }
962   LOG(FATAL) << "Unreachable";
963   UNREACHABLE();
964 }
965 
966 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)967 inline Condition X86_64FPCondition(IfCondition cond) {
968   switch (cond) {
969     case kCondEQ: return kEqual;
970     case kCondNE: return kNotEqual;
971     case kCondLT: return kBelow;
972     case kCondLE: return kBelowEqual;
973     case kCondGT: return kAbove;
974     case kCondGE: return kAboveEqual;
975     default:      break;  // should not happen
976   }
977   LOG(FATAL) << "Unreachable";
978   UNREACHABLE();
979 }
980 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)981 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
982       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
983       ArtMethod* method ATTRIBUTE_UNUSED) {
984   return desired_dispatch_info;
985 }
986 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)987 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
988     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
989   // All registers are assumed to be correctly set up.
990 
991   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
992   switch (invoke->GetMethodLoadKind()) {
993     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
994       // temp = thread->string_init_entrypoint
995       uint32_t offset =
996           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
997       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
998       break;
999     }
1000     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
1001       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1002       break;
1003     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
1004       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1005       __ leal(temp.AsRegister<CpuRegister>(),
1006               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1007       RecordBootImageMethodPatch(invoke);
1008       break;
1009     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
1010       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1011       __ movl(temp.AsRegister<CpuRegister>(),
1012               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1013       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1014       break;
1015     }
1016     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1017       __ movq(temp.AsRegister<CpuRegister>(),
1018               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1019       RecordMethodBssEntryPatch(invoke);
1020       // No need for memory fence, thanks to the x86-64 memory model.
1021       break;
1022     }
1023     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
1024       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1025       break;
1026     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1027       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1028       return;  // No code pointer retrieval; the runtime performs the call directly.
1029     }
1030   }
1031 
1032   switch (invoke->GetCodePtrLocation()) {
1033     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1034       __ call(&frame_entry_label_);
1035       break;
1036     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1037       // (callee_method + offset_of_quick_compiled_code)()
1038       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1039                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1040                           kX86_64PointerSize).SizeValue()));
1041       break;
1042   }
1043   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1044 
1045   DCHECK(!IsLeafMethod());
1046 }
1047 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1048 void CodeGeneratorX86_64::GenerateVirtualCall(
1049     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1050   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1051   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1052       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1053 
1054   // Use the calling convention instead of the location of the receiver, as
1055   // intrinsics may have put the receiver in a different register. In the intrinsics
1056   // slow path, the arguments have been moved to the right place, so here we are
1057   // guaranteed that the receiver is the first register of the calling convention.
1058   InvokeDexCallingConvention calling_convention;
1059   Register receiver = calling_convention.GetRegisterAt(0);
1060 
1061   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1062   // /* HeapReference<Class> */ temp = receiver->klass_
1063   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1064   MaybeRecordImplicitNullCheck(invoke);
1065   // Instead of simply (possibly) unpoisoning `temp` here, we should
1066   // emit a read barrier for the previous class reference load.
1067   // However this is not required in practice, as this is an
1068   // intermediate/temporary reference and because the current
1069   // concurrent copying collector keeps the from-space memory
1070   // intact/accessible until the end of the marking phase (the
1071   // concurrent copying collector may not in the future).
1072   __ MaybeUnpoisonHeapReference(temp);
1073 
1074   MaybeGenerateInlineCacheCheck(invoke, temp);
1075 
1076   // temp = temp->GetMethodAt(method_offset);
1077   __ movq(temp, Address(temp, method_offset));
1078   // call temp->GetEntryPoint();
1079   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1080       kX86_64PointerSize).SizeValue()));
1081   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1082 }
1083 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1084 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1085   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1086   __ Bind(&boot_image_other_patches_.back().label);
1087 }
1088 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1089 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1090   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1091   __ Bind(&boot_image_other_patches_.back().label);
1092 }
1093 
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1094 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1095   boot_image_method_patches_.emplace_back(
1096       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1097   __ Bind(&boot_image_method_patches_.back().label);
1098 }
1099 
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1100 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1101   method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1102   __ Bind(&method_bss_entry_patches_.back().label);
1103 }
1104 
RecordBootImageTypePatch(HLoadClass * load_class)1105 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1106   boot_image_type_patches_.emplace_back(
1107       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1108   __ Bind(&boot_image_type_patches_.back().label);
1109 }
1110 
NewTypeBssEntryPatch(HLoadClass * load_class)1111 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1112   type_bss_entry_patches_.emplace_back(
1113       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1114   return &type_bss_entry_patches_.back().label;
1115 }
1116 
RecordBootImageStringPatch(HLoadString * load_string)1117 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1118   boot_image_string_patches_.emplace_back(
1119       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1120   __ Bind(&boot_image_string_patches_.back().label);
1121 }
1122 
NewStringBssEntryPatch(HLoadString * load_string)1123 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1124   string_bss_entry_patches_.emplace_back(
1125       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1126   return &string_bss_entry_patches_.back().label;
1127 }
1128 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1129 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1130   if (GetCompilerOptions().IsBootImage()) {
1131     __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1132     RecordBootImageIntrinsicPatch(boot_image_reference);
1133   } else if (GetCompilerOptions().GetCompilePic()) {
1134     __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1135     RecordBootImageRelRoPatch(boot_image_reference);
1136   } else {
1137     DCHECK(Runtime::Current()->UseJitCompilation());
1138     gc::Heap* heap = Runtime::Current()->GetHeap();
1139     DCHECK(!heap->GetBootImageSpaces().empty());
1140     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1141     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1142   }
1143 }
1144 
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)1145 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
1146                                                        uint32_t boot_image_offset) {
1147   DCHECK(invoke->IsStatic());
1148   InvokeRuntimeCallingConvention calling_convention;
1149   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
1150   if (GetCompilerOptions().IsBootImage()) {
1151     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
1152     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1153     __ leal(argument,
1154             Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1155     MethodReference target_method = invoke->GetTargetMethod();
1156     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1157     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1158     __ Bind(&boot_image_type_patches_.back().label);
1159   } else {
1160     LoadBootImageAddress(argument, boot_image_offset);
1161   }
1162   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
1163   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
1164 }
1165 
1166 // The label points to the end of the "movl" or another instruction but the literal offset
1167 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1168 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1169 
1170 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1171 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1172     const ArenaDeque<PatchInfo<Label>>& infos,
1173     ArenaVector<linker::LinkerPatch>* linker_patches) {
1174   for (const PatchInfo<Label>& info : infos) {
1175     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1176     linker_patches->push_back(
1177         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1178   }
1179 }
1180 
1181 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1182 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1183                                      const DexFile* target_dex_file,
1184                                      uint32_t pc_insn_offset,
1185                                      uint32_t boot_image_offset) {
1186   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1187   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1188 }
1189 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1190 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1191   DCHECK(linker_patches->empty());
1192   size_t size =
1193       boot_image_method_patches_.size() +
1194       method_bss_entry_patches_.size() +
1195       boot_image_type_patches_.size() +
1196       type_bss_entry_patches_.size() +
1197       boot_image_string_patches_.size() +
1198       string_bss_entry_patches_.size() +
1199       boot_image_other_patches_.size();
1200   linker_patches->reserve(size);
1201   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1202     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1203         boot_image_method_patches_, linker_patches);
1204     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1205         boot_image_type_patches_, linker_patches);
1206     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1207         boot_image_string_patches_, linker_patches);
1208   } else {
1209     DCHECK(boot_image_method_patches_.empty());
1210     DCHECK(boot_image_type_patches_.empty());
1211     DCHECK(boot_image_string_patches_.empty());
1212   }
1213   if (GetCompilerOptions().IsBootImage()) {
1214     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1215         boot_image_other_patches_, linker_patches);
1216   } else {
1217     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1218         boot_image_other_patches_, linker_patches);
1219   }
1220   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1221       method_bss_entry_patches_, linker_patches);
1222   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1223       type_bss_entry_patches_, linker_patches);
1224   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1225       string_bss_entry_patches_, linker_patches);
1226   DCHECK_EQ(size, linker_patches->size());
1227 }
1228 
DumpCoreRegister(std::ostream & stream,int reg) const1229 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1230   stream << Register(reg);
1231 }
1232 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1233 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1234   stream << FloatRegister(reg);
1235 }
1236 
GetInstructionSetFeatures() const1237 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1238   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1239 }
1240 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1241 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1242   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1243   return kX86_64WordSize;
1244 }
1245 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1246 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1247   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1248   return kX86_64WordSize;
1249 }
1250 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1251 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1252   if (GetGraph()->HasSIMD()) {
1253     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1254   } else {
1255     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1256   }
1257   return GetSlowPathFPWidth();
1258 }
1259 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1260 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1261   if (GetGraph()->HasSIMD()) {
1262     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1263   } else {
1264     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1265   }
1266   return GetSlowPathFPWidth();
1267 }
1268 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1269 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1270                                         HInstruction* instruction,
1271                                         uint32_t dex_pc,
1272                                         SlowPathCode* slow_path) {
1273   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1274   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1275   if (EntrypointRequiresStackMap(entrypoint)) {
1276     RecordPcInfo(instruction, dex_pc, slow_path);
1277   }
1278 }
1279 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1280 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1281                                                               HInstruction* instruction,
1282                                                               SlowPathCode* slow_path) {
1283   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1284   GenerateInvokeRuntime(entry_point_offset);
1285 }
1286 
GenerateInvokeRuntime(int32_t entry_point_offset)1287 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1288   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1289 }
1290 
1291 static constexpr int kNumberOfCpuRegisterPairs = 0;
1292 // Use a fake return address register to mimic Quick.
1293 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1294 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1295                                          const CompilerOptions& compiler_options,
1296                                          OptimizingCompilerStats* stats)
1297       : CodeGenerator(graph,
1298                       kNumberOfCpuRegisters,
1299                       kNumberOfFloatRegisters,
1300                       kNumberOfCpuRegisterPairs,
1301                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1302                                           arraysize(kCoreCalleeSaves))
1303                           | (1 << kFakeReturnRegister),
1304                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1305                                           arraysize(kFpuCalleeSaves)),
1306                       compiler_options,
1307                       stats),
1308         block_labels_(nullptr),
1309         location_builder_(graph, this),
1310         instruction_visitor_(graph, this),
1311         move_resolver_(graph->GetAllocator(), this),
1312         assembler_(graph->GetAllocator()),
1313         constant_area_start_(0),
1314         boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1315         method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1316         boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1317         type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1318         boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1319         string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1320         boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1321         jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1322         jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1323         fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1324   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1325 }
1326 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1327 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1328                                                                CodeGeneratorX86_64* codegen)
1329       : InstructionCodeGenerator(graph, codegen),
1330         assembler_(codegen->GetAssembler()),
1331         codegen_(codegen) {}
1332 
SetupBlockedRegisters() const1333 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1334   // Stack register is always reserved.
1335   blocked_core_registers_[RSP] = true;
1336 
1337   // Block the register used as TMP.
1338   blocked_core_registers_[TMP] = true;
1339 }
1340 
DWARFReg(Register reg)1341 static dwarf::Reg DWARFReg(Register reg) {
1342   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1343 }
1344 
DWARFReg(FloatRegister reg)1345 static dwarf::Reg DWARFReg(FloatRegister reg) {
1346   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1347 }
1348 
MaybeIncrementHotness(bool is_frame_entry)1349 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1350   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1351     NearLabel overflow;
1352     Register method = kMethodRegisterArgument;
1353     if (!is_frame_entry) {
1354       CHECK(RequiresCurrentMethod());
1355       method = TMP;
1356       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1357     }
1358     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1359             Immediate(ArtMethod::MaxCounter()));
1360     __ j(kEqual, &overflow);
1361     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1362             Immediate(1));
1363     __ Bind(&overflow);
1364   }
1365 
1366   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1367     ScopedObjectAccess soa(Thread::Current());
1368     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1369     if (info != nullptr) {
1370       uint64_t address = reinterpret_cast64<uint64_t>(info);
1371       NearLabel done;
1372       __ movq(CpuRegister(TMP), Immediate(address));
1373       __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1374               Immediate(1));
1375       __ j(kCarryClear, &done);
1376       if (HasEmptyFrame()) {
1377         CHECK(is_frame_entry);
1378         // Frame alignment, and the stub expects the method on the stack.
1379         __ pushq(CpuRegister(RDI));
1380         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1381         __ cfi().RelOffset(DWARFReg(RDI), 0);
1382       } else if (!RequiresCurrentMethod()) {
1383         CHECK(is_frame_entry);
1384         __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
1385       }
1386       GenerateInvokeRuntime(
1387           GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1388       if (HasEmptyFrame()) {
1389         __ popq(CpuRegister(RDI));
1390         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1391         __ cfi().Restore(DWARFReg(RDI));
1392       }
1393       __ Bind(&done);
1394     }
1395   }
1396 }
1397 
GenerateFrameEntry()1398 void CodeGeneratorX86_64::GenerateFrameEntry() {
1399   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1400   __ Bind(&frame_entry_label_);
1401   bool skip_overflow_check = IsLeafMethod()
1402       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1403   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1404 
1405 
1406   if (!skip_overflow_check) {
1407     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1408     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1409     RecordPcInfo(nullptr, 0);
1410   }
1411 
1412   if (!HasEmptyFrame()) {
1413     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1414       Register reg = kCoreCalleeSaves[i];
1415       if (allocated_registers_.ContainsCoreRegister(reg)) {
1416         __ pushq(CpuRegister(reg));
1417         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1418         __ cfi().RelOffset(DWARFReg(reg), 0);
1419       }
1420     }
1421 
1422     int adjust = GetFrameSize() - GetCoreSpillSize();
1423     __ subq(CpuRegister(RSP), Immediate(adjust));
1424     __ cfi().AdjustCFAOffset(adjust);
1425     uint32_t xmm_spill_location = GetFpuSpillStart();
1426     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1427 
1428     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1429       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1430         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1431         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1432         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1433       }
1434     }
1435 
1436     // Save the current method if we need it. Note that we do not
1437     // do this in HCurrentMethod, as the instruction might have been removed
1438     // in the SSA graph.
1439     if (RequiresCurrentMethod()) {
1440       CHECK(!HasEmptyFrame());
1441       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1442               CpuRegister(kMethodRegisterArgument));
1443     }
1444 
1445     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1446       CHECK(!HasEmptyFrame());
1447       // Initialize should_deoptimize flag to 0.
1448       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1449     }
1450   }
1451 
1452   MaybeIncrementHotness(/* is_frame_entry= */ true);
1453 }
1454 
GenerateFrameExit()1455 void CodeGeneratorX86_64::GenerateFrameExit() {
1456   __ cfi().RememberState();
1457   if (!HasEmptyFrame()) {
1458     uint32_t xmm_spill_location = GetFpuSpillStart();
1459     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1460     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1461       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1462         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1463         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1464         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1465       }
1466     }
1467 
1468     int adjust = GetFrameSize() - GetCoreSpillSize();
1469     __ addq(CpuRegister(RSP), Immediate(adjust));
1470     __ cfi().AdjustCFAOffset(-adjust);
1471 
1472     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1473       Register reg = kCoreCalleeSaves[i];
1474       if (allocated_registers_.ContainsCoreRegister(reg)) {
1475         __ popq(CpuRegister(reg));
1476         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1477         __ cfi().Restore(DWARFReg(reg));
1478       }
1479     }
1480   }
1481   __ ret();
1482   __ cfi().RestoreState();
1483   __ cfi().DefCFAOffset(GetFrameSize());
1484 }
1485 
Bind(HBasicBlock * block)1486 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1487   __ Bind(GetLabelOf(block));
1488 }
1489 
Move(Location destination,Location source)1490 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1491   if (source.Equals(destination)) {
1492     return;
1493   }
1494   if (destination.IsRegister()) {
1495     CpuRegister dest = destination.AsRegister<CpuRegister>();
1496     if (source.IsRegister()) {
1497       __ movq(dest, source.AsRegister<CpuRegister>());
1498     } else if (source.IsFpuRegister()) {
1499       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1500     } else if (source.IsStackSlot()) {
1501       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1502     } else if (source.IsConstant()) {
1503       HConstant* constant = source.GetConstant();
1504       if (constant->IsLongConstant()) {
1505         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1506       } else {
1507         Load32BitValue(dest, GetInt32ValueOf(constant));
1508       }
1509     } else {
1510       DCHECK(source.IsDoubleStackSlot());
1511       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1512     }
1513   } else if (destination.IsFpuRegister()) {
1514     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1515     if (source.IsRegister()) {
1516       __ movd(dest, source.AsRegister<CpuRegister>());
1517     } else if (source.IsFpuRegister()) {
1518       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1519     } else if (source.IsConstant()) {
1520       HConstant* constant = source.GetConstant();
1521       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1522       if (constant->IsFloatConstant()) {
1523         Load32BitValue(dest, static_cast<int32_t>(value));
1524       } else {
1525         Load64BitValue(dest, value);
1526       }
1527     } else if (source.IsStackSlot()) {
1528       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1529     } else {
1530       DCHECK(source.IsDoubleStackSlot());
1531       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1532     }
1533   } else if (destination.IsStackSlot()) {
1534     if (source.IsRegister()) {
1535       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1536               source.AsRegister<CpuRegister>());
1537     } else if (source.IsFpuRegister()) {
1538       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1539                source.AsFpuRegister<XmmRegister>());
1540     } else if (source.IsConstant()) {
1541       HConstant* constant = source.GetConstant();
1542       int32_t value = GetInt32ValueOf(constant);
1543       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1544     } else {
1545       DCHECK(source.IsStackSlot()) << source;
1546       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1547       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1548     }
1549   } else {
1550     DCHECK(destination.IsDoubleStackSlot());
1551     if (source.IsRegister()) {
1552       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1553               source.AsRegister<CpuRegister>());
1554     } else if (source.IsFpuRegister()) {
1555       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1556                source.AsFpuRegister<XmmRegister>());
1557     } else if (source.IsConstant()) {
1558       HConstant* constant = source.GetConstant();
1559       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1560       int64_t value = GetInt64ValueOf(constant);
1561       Store64BitValueToStack(destination, value);
1562     } else {
1563       DCHECK(source.IsDoubleStackSlot());
1564       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1565       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1566     }
1567   }
1568 }
1569 
MoveConstant(Location location,int32_t value)1570 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1571   DCHECK(location.IsRegister());
1572   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1573 }
1574 
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1575 void CodeGeneratorX86_64::MoveLocation(
1576     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1577   Move(dst, src);
1578 }
1579 
AddLocationAsTemp(Location location,LocationSummary * locations)1580 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1581   if (location.IsRegister()) {
1582     locations->AddTemp(location);
1583   } else {
1584     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1585   }
1586 }
1587 
HandleGoto(HInstruction * got,HBasicBlock * successor)1588 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1589   if (successor->IsExitBlock()) {
1590     DCHECK(got->GetPrevious()->AlwaysThrows());
1591     return;  // no code needed
1592   }
1593 
1594   HBasicBlock* block = got->GetBlock();
1595   HInstruction* previous = got->GetPrevious();
1596 
1597   HLoopInformation* info = block->GetLoopInformation();
1598   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1599     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1600     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1601     return;
1602   }
1603 
1604   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1605     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1606   }
1607   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1608     __ jmp(codegen_->GetLabelOf(successor));
1609   }
1610 }
1611 
VisitGoto(HGoto * got)1612 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1613   got->SetLocations(nullptr);
1614 }
1615 
VisitGoto(HGoto * got)1616 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1617   HandleGoto(got, got->GetSuccessor());
1618 }
1619 
VisitTryBoundary(HTryBoundary * try_boundary)1620 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1621   try_boundary->SetLocations(nullptr);
1622 }
1623 
VisitTryBoundary(HTryBoundary * try_boundary)1624 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1625   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1626   if (!successor->IsExitBlock()) {
1627     HandleGoto(try_boundary, successor);
1628   }
1629 }
1630 
VisitExit(HExit * exit)1631 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1632   exit->SetLocations(nullptr);
1633 }
1634 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1635 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1636 }
1637 
1638 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1639 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1640                                                      LabelType* true_label,
1641                                                      LabelType* false_label) {
1642   if (cond->IsFPConditionTrueIfNaN()) {
1643     __ j(kUnordered, true_label);
1644   } else if (cond->IsFPConditionFalseIfNaN()) {
1645     __ j(kUnordered, false_label);
1646   }
1647   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1648 }
1649 
GenerateCompareTest(HCondition * condition)1650 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1651   LocationSummary* locations = condition->GetLocations();
1652 
1653   Location left = locations->InAt(0);
1654   Location right = locations->InAt(1);
1655   DataType::Type type = condition->InputAt(0)->GetType();
1656   switch (type) {
1657     case DataType::Type::kBool:
1658     case DataType::Type::kUint8:
1659     case DataType::Type::kInt8:
1660     case DataType::Type::kUint16:
1661     case DataType::Type::kInt16:
1662     case DataType::Type::kInt32:
1663     case DataType::Type::kReference: {
1664       codegen_->GenerateIntCompare(left, right);
1665       break;
1666     }
1667     case DataType::Type::kInt64: {
1668       codegen_->GenerateLongCompare(left, right);
1669       break;
1670     }
1671     case DataType::Type::kFloat32: {
1672       if (right.IsFpuRegister()) {
1673         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1674       } else if (right.IsConstant()) {
1675         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1676                    codegen_->LiteralFloatAddress(
1677                      right.GetConstant()->AsFloatConstant()->GetValue()));
1678       } else {
1679         DCHECK(right.IsStackSlot());
1680         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1681                    Address(CpuRegister(RSP), right.GetStackIndex()));
1682       }
1683       break;
1684     }
1685     case DataType::Type::kFloat64: {
1686       if (right.IsFpuRegister()) {
1687         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1688       } else if (right.IsConstant()) {
1689         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1690                    codegen_->LiteralDoubleAddress(
1691                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1692       } else {
1693         DCHECK(right.IsDoubleStackSlot());
1694         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1695                    Address(CpuRegister(RSP), right.GetStackIndex()));
1696       }
1697       break;
1698     }
1699     default:
1700       LOG(FATAL) << "Unexpected condition type " << type;
1701   }
1702 }
1703 
1704 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1705 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1706                                                                   LabelType* true_target_in,
1707                                                                   LabelType* false_target_in) {
1708   // Generated branching requires both targets to be explicit. If either of the
1709   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1710   LabelType fallthrough_target;
1711   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1712   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1713 
1714   // Generate the comparison to set the CC.
1715   GenerateCompareTest(condition);
1716 
1717   // Now generate the correct jump(s).
1718   DataType::Type type = condition->InputAt(0)->GetType();
1719   switch (type) {
1720     case DataType::Type::kInt64: {
1721       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1722       break;
1723     }
1724     case DataType::Type::kFloat32: {
1725       GenerateFPJumps(condition, true_target, false_target);
1726       break;
1727     }
1728     case DataType::Type::kFloat64: {
1729       GenerateFPJumps(condition, true_target, false_target);
1730       break;
1731     }
1732     default:
1733       LOG(FATAL) << "Unexpected condition type " << type;
1734   }
1735 
1736   if (false_target != &fallthrough_target) {
1737     __ jmp(false_target);
1738   }
1739 
1740   if (fallthrough_target.IsLinked()) {
1741     __ Bind(&fallthrough_target);
1742   }
1743 }
1744 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1745 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1746   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1747   // are set only strictly before `branch`. We can't use the eflags on long
1748   // conditions if they are materialized due to the complex branching.
1749   return cond->IsCondition() &&
1750          cond->GetNext() == branch &&
1751          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1752 }
1753 
1754 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1755 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1756                                                            size_t condition_input_index,
1757                                                            LabelType* true_target,
1758                                                            LabelType* false_target) {
1759   HInstruction* cond = instruction->InputAt(condition_input_index);
1760 
1761   if (true_target == nullptr && false_target == nullptr) {
1762     // Nothing to do. The code always falls through.
1763     return;
1764   } else if (cond->IsIntConstant()) {
1765     // Constant condition, statically compared against "true" (integer value 1).
1766     if (cond->AsIntConstant()->IsTrue()) {
1767       if (true_target != nullptr) {
1768         __ jmp(true_target);
1769       }
1770     } else {
1771       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1772       if (false_target != nullptr) {
1773         __ jmp(false_target);
1774       }
1775     }
1776     return;
1777   }
1778 
1779   // The following code generates these patterns:
1780   //  (1) true_target == nullptr && false_target != nullptr
1781   //        - opposite condition true => branch to false_target
1782   //  (2) true_target != nullptr && false_target == nullptr
1783   //        - condition true => branch to true_target
1784   //  (3) true_target != nullptr && false_target != nullptr
1785   //        - condition true => branch to true_target
1786   //        - branch to false_target
1787   if (IsBooleanValueOrMaterializedCondition(cond)) {
1788     if (AreEflagsSetFrom(cond, instruction)) {
1789       if (true_target == nullptr) {
1790         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1791       } else {
1792         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1793       }
1794     } else {
1795       // Materialized condition, compare against 0.
1796       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1797       if (lhs.IsRegister()) {
1798         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1799       } else {
1800         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1801       }
1802       if (true_target == nullptr) {
1803         __ j(kEqual, false_target);
1804       } else {
1805         __ j(kNotEqual, true_target);
1806       }
1807     }
1808   } else {
1809     // Condition has not been materialized, use its inputs as the
1810     // comparison and its condition as the branch condition.
1811     HCondition* condition = cond->AsCondition();
1812 
1813     // If this is a long or FP comparison that has been folded into
1814     // the HCondition, generate the comparison directly.
1815     DataType::Type type = condition->InputAt(0)->GetType();
1816     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1817       GenerateCompareTestAndBranch(condition, true_target, false_target);
1818       return;
1819     }
1820 
1821     Location lhs = condition->GetLocations()->InAt(0);
1822     Location rhs = condition->GetLocations()->InAt(1);
1823     codegen_->GenerateIntCompare(lhs, rhs);
1824       if (true_target == nullptr) {
1825       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1826     } else {
1827       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1828     }
1829   }
1830 
1831   // If neither branch falls through (case 3), the conditional branch to `true_target`
1832   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1833   if (true_target != nullptr && false_target != nullptr) {
1834     __ jmp(false_target);
1835   }
1836 }
1837 
VisitIf(HIf * if_instr)1838 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1839   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1840   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1841     locations->SetInAt(0, Location::Any());
1842   }
1843 }
1844 
VisitIf(HIf * if_instr)1845 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1846   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1847   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1848   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1849       nullptr : codegen_->GetLabelOf(true_successor);
1850   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1851       nullptr : codegen_->GetLabelOf(false_successor);
1852   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1853 }
1854 
VisitDeoptimize(HDeoptimize * deoptimize)1855 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1856   LocationSummary* locations = new (GetGraph()->GetAllocator())
1857       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1858   InvokeRuntimeCallingConvention calling_convention;
1859   RegisterSet caller_saves = RegisterSet::Empty();
1860   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1861   locations->SetCustomSlowPathCallerSaves(caller_saves);
1862   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1863     locations->SetInAt(0, Location::Any());
1864   }
1865 }
1866 
VisitDeoptimize(HDeoptimize * deoptimize)1867 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1868   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1869   GenerateTestAndBranch<Label>(deoptimize,
1870                                /* condition_input_index= */ 0,
1871                                slow_path->GetEntryLabel(),
1872                                /* false_target= */ nullptr);
1873 }
1874 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1875 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1876   LocationSummary* locations = new (GetGraph()->GetAllocator())
1877       LocationSummary(flag, LocationSummary::kNoCall);
1878   locations->SetOut(Location::RequiresRegister());
1879 }
1880 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1881 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1882   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1883           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1884 }
1885 
SelectCanUseCMOV(HSelect * select)1886 static bool SelectCanUseCMOV(HSelect* select) {
1887   // There are no conditional move instructions for XMMs.
1888   if (DataType::IsFloatingPointType(select->GetType())) {
1889     return false;
1890   }
1891 
1892   // A FP condition doesn't generate the single CC that we need.
1893   HInstruction* condition = select->GetCondition();
1894   if (condition->IsCondition() &&
1895       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1896     return false;
1897   }
1898 
1899   // We can generate a CMOV for this Select.
1900   return true;
1901 }
1902 
VisitSelect(HSelect * select)1903 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1904   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1905   if (DataType::IsFloatingPointType(select->GetType())) {
1906     locations->SetInAt(0, Location::RequiresFpuRegister());
1907     locations->SetInAt(1, Location::Any());
1908   } else {
1909     locations->SetInAt(0, Location::RequiresRegister());
1910     if (SelectCanUseCMOV(select)) {
1911       if (select->InputAt(1)->IsConstant()) {
1912         locations->SetInAt(1, Location::RequiresRegister());
1913       } else {
1914         locations->SetInAt(1, Location::Any());
1915       }
1916     } else {
1917       locations->SetInAt(1, Location::Any());
1918     }
1919   }
1920   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1921     locations->SetInAt(2, Location::RequiresRegister());
1922   }
1923   locations->SetOut(Location::SameAsFirstInput());
1924 }
1925 
VisitSelect(HSelect * select)1926 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1927   LocationSummary* locations = select->GetLocations();
1928   if (SelectCanUseCMOV(select)) {
1929     // If both the condition and the source types are integer, we can generate
1930     // a CMOV to implement Select.
1931     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1932     Location value_true_loc = locations->InAt(1);
1933     DCHECK(locations->InAt(0).Equals(locations->Out()));
1934 
1935     HInstruction* select_condition = select->GetCondition();
1936     Condition cond = kNotEqual;
1937 
1938     // Figure out how to test the 'condition'.
1939     if (select_condition->IsCondition()) {
1940       HCondition* condition = select_condition->AsCondition();
1941       if (!condition->IsEmittedAtUseSite()) {
1942         // This was a previously materialized condition.
1943         // Can we use the existing condition code?
1944         if (AreEflagsSetFrom(condition, select)) {
1945           // Materialization was the previous instruction.  Condition codes are right.
1946           cond = X86_64IntegerCondition(condition->GetCondition());
1947         } else {
1948           // No, we have to recreate the condition code.
1949           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1950           __ testl(cond_reg, cond_reg);
1951         }
1952       } else {
1953         GenerateCompareTest(condition);
1954         cond = X86_64IntegerCondition(condition->GetCondition());
1955       }
1956     } else {
1957       // Must be a Boolean condition, which needs to be compared to 0.
1958       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1959       __ testl(cond_reg, cond_reg);
1960     }
1961 
1962     // If the condition is true, overwrite the output, which already contains false.
1963     // Generate the correct sized CMOV.
1964     bool is_64_bit = DataType::Is64BitType(select->GetType());
1965     if (value_true_loc.IsRegister()) {
1966       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1967     } else {
1968       __ cmov(cond,
1969               value_false,
1970               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1971     }
1972   } else {
1973     NearLabel false_target;
1974     GenerateTestAndBranch<NearLabel>(select,
1975                                      /* condition_input_index= */ 2,
1976                                      /* true_target= */ nullptr,
1977                                      &false_target);
1978     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1979     __ Bind(&false_target);
1980   }
1981 }
1982 
VisitNativeDebugInfo(HNativeDebugInfo * info)1983 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1984   new (GetGraph()->GetAllocator()) LocationSummary(info);
1985 }
1986 
VisitNativeDebugInfo(HNativeDebugInfo *)1987 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1988   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1989 }
1990 
GenerateNop()1991 void CodeGeneratorX86_64::GenerateNop() {
1992   __ nop();
1993 }
1994 
HandleCondition(HCondition * cond)1995 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1996   LocationSummary* locations =
1997       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1998   // Handle the long/FP comparisons made in instruction simplification.
1999   switch (cond->InputAt(0)->GetType()) {
2000     case DataType::Type::kInt64:
2001       locations->SetInAt(0, Location::RequiresRegister());
2002       locations->SetInAt(1, Location::Any());
2003       break;
2004     case DataType::Type::kFloat32:
2005     case DataType::Type::kFloat64:
2006       locations->SetInAt(0, Location::RequiresFpuRegister());
2007       locations->SetInAt(1, Location::Any());
2008       break;
2009     default:
2010       locations->SetInAt(0, Location::RequiresRegister());
2011       locations->SetInAt(1, Location::Any());
2012       break;
2013   }
2014   if (!cond->IsEmittedAtUseSite()) {
2015     locations->SetOut(Location::RequiresRegister());
2016   }
2017 }
2018 
HandleCondition(HCondition * cond)2019 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2020   if (cond->IsEmittedAtUseSite()) {
2021     return;
2022   }
2023 
2024   LocationSummary* locations = cond->GetLocations();
2025   Location lhs = locations->InAt(0);
2026   Location rhs = locations->InAt(1);
2027   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2028   NearLabel true_label, false_label;
2029 
2030   switch (cond->InputAt(0)->GetType()) {
2031     default:
2032       // Integer case.
2033 
2034       // Clear output register: setcc only sets the low byte.
2035       __ xorl(reg, reg);
2036 
2037       codegen_->GenerateIntCompare(lhs, rhs);
2038       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2039       return;
2040     case DataType::Type::kInt64:
2041       // Clear output register: setcc only sets the low byte.
2042       __ xorl(reg, reg);
2043 
2044       codegen_->GenerateLongCompare(lhs, rhs);
2045       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2046       return;
2047     case DataType::Type::kFloat32: {
2048       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2049       if (rhs.IsConstant()) {
2050         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2051         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2052       } else if (rhs.IsStackSlot()) {
2053         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2054       } else {
2055         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2056       }
2057       GenerateFPJumps(cond, &true_label, &false_label);
2058       break;
2059     }
2060     case DataType::Type::kFloat64: {
2061       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2062       if (rhs.IsConstant()) {
2063         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2064         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2065       } else if (rhs.IsDoubleStackSlot()) {
2066         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2067       } else {
2068         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2069       }
2070       GenerateFPJumps(cond, &true_label, &false_label);
2071       break;
2072     }
2073   }
2074 
2075   // Convert the jumps into the result.
2076   NearLabel done_label;
2077 
2078   // False case: result = 0.
2079   __ Bind(&false_label);
2080   __ xorl(reg, reg);
2081   __ jmp(&done_label);
2082 
2083   // True case: result = 1.
2084   __ Bind(&true_label);
2085   __ movl(reg, Immediate(1));
2086   __ Bind(&done_label);
2087 }
2088 
VisitEqual(HEqual * comp)2089 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2090   HandleCondition(comp);
2091 }
2092 
VisitEqual(HEqual * comp)2093 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2094   HandleCondition(comp);
2095 }
2096 
VisitNotEqual(HNotEqual * comp)2097 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2098   HandleCondition(comp);
2099 }
2100 
VisitNotEqual(HNotEqual * comp)2101 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2102   HandleCondition(comp);
2103 }
2104 
VisitLessThan(HLessThan * comp)2105 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2106   HandleCondition(comp);
2107 }
2108 
VisitLessThan(HLessThan * comp)2109 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2110   HandleCondition(comp);
2111 }
2112 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2113 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2114   HandleCondition(comp);
2115 }
2116 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2117 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2118   HandleCondition(comp);
2119 }
2120 
VisitGreaterThan(HGreaterThan * comp)2121 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2122   HandleCondition(comp);
2123 }
2124 
VisitGreaterThan(HGreaterThan * comp)2125 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2126   HandleCondition(comp);
2127 }
2128 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2129 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2130   HandleCondition(comp);
2131 }
2132 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2133 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2134   HandleCondition(comp);
2135 }
2136 
VisitBelow(HBelow * comp)2137 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2138   HandleCondition(comp);
2139 }
2140 
VisitBelow(HBelow * comp)2141 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2142   HandleCondition(comp);
2143 }
2144 
VisitBelowOrEqual(HBelowOrEqual * comp)2145 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2146   HandleCondition(comp);
2147 }
2148 
VisitBelowOrEqual(HBelowOrEqual * comp)2149 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2150   HandleCondition(comp);
2151 }
2152 
VisitAbove(HAbove * comp)2153 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2154   HandleCondition(comp);
2155 }
2156 
VisitAbove(HAbove * comp)2157 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2158   HandleCondition(comp);
2159 }
2160 
VisitAboveOrEqual(HAboveOrEqual * comp)2161 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2162   HandleCondition(comp);
2163 }
2164 
VisitAboveOrEqual(HAboveOrEqual * comp)2165 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2166   HandleCondition(comp);
2167 }
2168 
VisitCompare(HCompare * compare)2169 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2170   LocationSummary* locations =
2171       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2172   switch (compare->InputAt(0)->GetType()) {
2173     case DataType::Type::kBool:
2174     case DataType::Type::kUint8:
2175     case DataType::Type::kInt8:
2176     case DataType::Type::kUint16:
2177     case DataType::Type::kInt16:
2178     case DataType::Type::kInt32:
2179     case DataType::Type::kInt64: {
2180       locations->SetInAt(0, Location::RequiresRegister());
2181       locations->SetInAt(1, Location::Any());
2182       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2183       break;
2184     }
2185     case DataType::Type::kFloat32:
2186     case DataType::Type::kFloat64: {
2187       locations->SetInAt(0, Location::RequiresFpuRegister());
2188       locations->SetInAt(1, Location::Any());
2189       locations->SetOut(Location::RequiresRegister());
2190       break;
2191     }
2192     default:
2193       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2194   }
2195 }
2196 
VisitCompare(HCompare * compare)2197 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2198   LocationSummary* locations = compare->GetLocations();
2199   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2200   Location left = locations->InAt(0);
2201   Location right = locations->InAt(1);
2202 
2203   NearLabel less, greater, done;
2204   DataType::Type type = compare->InputAt(0)->GetType();
2205   Condition less_cond = kLess;
2206 
2207   switch (type) {
2208     case DataType::Type::kBool:
2209     case DataType::Type::kUint8:
2210     case DataType::Type::kInt8:
2211     case DataType::Type::kUint16:
2212     case DataType::Type::kInt16:
2213     case DataType::Type::kInt32: {
2214       codegen_->GenerateIntCompare(left, right);
2215       break;
2216     }
2217     case DataType::Type::kInt64: {
2218       codegen_->GenerateLongCompare(left, right);
2219       break;
2220     }
2221     case DataType::Type::kFloat32: {
2222       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2223       if (right.IsConstant()) {
2224         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2225         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2226       } else if (right.IsStackSlot()) {
2227         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2228       } else {
2229         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2230       }
2231       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2232       less_cond = kBelow;  //  ucomis{s,d} sets CF
2233       break;
2234     }
2235     case DataType::Type::kFloat64: {
2236       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2237       if (right.IsConstant()) {
2238         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2239         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2240       } else if (right.IsDoubleStackSlot()) {
2241         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2242       } else {
2243         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2244       }
2245       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2246       less_cond = kBelow;  //  ucomis{s,d} sets CF
2247       break;
2248     }
2249     default:
2250       LOG(FATAL) << "Unexpected compare type " << type;
2251   }
2252 
2253   __ movl(out, Immediate(0));
2254   __ j(kEqual, &done);
2255   __ j(less_cond, &less);
2256 
2257   __ Bind(&greater);
2258   __ movl(out, Immediate(1));
2259   __ jmp(&done);
2260 
2261   __ Bind(&less);
2262   __ movl(out, Immediate(-1));
2263 
2264   __ Bind(&done);
2265 }
2266 
VisitIntConstant(HIntConstant * constant)2267 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2268   LocationSummary* locations =
2269       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2270   locations->SetOut(Location::ConstantLocation(constant));
2271 }
2272 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2273 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2274   // Will be generated at use site.
2275 }
2276 
VisitNullConstant(HNullConstant * constant)2277 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2278   LocationSummary* locations =
2279       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2280   locations->SetOut(Location::ConstantLocation(constant));
2281 }
2282 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2283 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2284   // Will be generated at use site.
2285 }
2286 
VisitLongConstant(HLongConstant * constant)2287 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2288   LocationSummary* locations =
2289       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2290   locations->SetOut(Location::ConstantLocation(constant));
2291 }
2292 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2293 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2294   // Will be generated at use site.
2295 }
2296 
VisitFloatConstant(HFloatConstant * constant)2297 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2298   LocationSummary* locations =
2299       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2300   locations->SetOut(Location::ConstantLocation(constant));
2301 }
2302 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2303 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2304   // Will be generated at use site.
2305 }
2306 
VisitDoubleConstant(HDoubleConstant * constant)2307 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2308   LocationSummary* locations =
2309       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2310   locations->SetOut(Location::ConstantLocation(constant));
2311 }
2312 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2313 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2314     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2315   // Will be generated at use site.
2316 }
2317 
VisitConstructorFence(HConstructorFence * constructor_fence)2318 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2319   constructor_fence->SetLocations(nullptr);
2320 }
2321 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2322 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2323     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2324   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2325 }
2326 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2327 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2328   memory_barrier->SetLocations(nullptr);
2329 }
2330 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2331 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2332   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2333 }
2334 
VisitReturnVoid(HReturnVoid * ret)2335 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2336   ret->SetLocations(nullptr);
2337 }
2338 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2339 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2340   codegen_->GenerateFrameExit();
2341 }
2342 
VisitReturn(HReturn * ret)2343 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2344   LocationSummary* locations =
2345       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2346   switch (ret->InputAt(0)->GetType()) {
2347     case DataType::Type::kReference:
2348     case DataType::Type::kBool:
2349     case DataType::Type::kUint8:
2350     case DataType::Type::kInt8:
2351     case DataType::Type::kUint16:
2352     case DataType::Type::kInt16:
2353     case DataType::Type::kInt32:
2354     case DataType::Type::kInt64:
2355       locations->SetInAt(0, Location::RegisterLocation(RAX));
2356       break;
2357 
2358     case DataType::Type::kFloat32:
2359     case DataType::Type::kFloat64:
2360       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2361       break;
2362 
2363     default:
2364       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2365   }
2366 }
2367 
VisitReturn(HReturn * ret)2368 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2369   switch (ret->InputAt(0)->GetType()) {
2370     case DataType::Type::kReference:
2371     case DataType::Type::kBool:
2372     case DataType::Type::kUint8:
2373     case DataType::Type::kInt8:
2374     case DataType::Type::kUint16:
2375     case DataType::Type::kInt16:
2376     case DataType::Type::kInt32:
2377     case DataType::Type::kInt64:
2378       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2379       break;
2380 
2381     case DataType::Type::kFloat32: {
2382       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2383                 XMM0);
2384       // To simplify callers of an OSR method, we put the return value in both
2385       // floating point and core register.
2386       if (GetGraph()->IsCompilingOsr()) {
2387         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2388       }
2389       break;
2390     }
2391     case DataType::Type::kFloat64: {
2392       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2393                 XMM0);
2394       // To simplify callers of an OSR method, we put the return value in both
2395       // floating point and core register.
2396       if (GetGraph()->IsCompilingOsr()) {
2397         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2398       }
2399       break;
2400     }
2401 
2402     default:
2403       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2404   }
2405   codegen_->GenerateFrameExit();
2406 }
2407 
GetReturnLocation(DataType::Type type) const2408 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2409   switch (type) {
2410     case DataType::Type::kReference:
2411     case DataType::Type::kBool:
2412     case DataType::Type::kUint8:
2413     case DataType::Type::kInt8:
2414     case DataType::Type::kUint16:
2415     case DataType::Type::kInt16:
2416     case DataType::Type::kUint32:
2417     case DataType::Type::kInt32:
2418     case DataType::Type::kUint64:
2419     case DataType::Type::kInt64:
2420       return Location::RegisterLocation(RAX);
2421 
2422     case DataType::Type::kVoid:
2423       return Location::NoLocation();
2424 
2425     case DataType::Type::kFloat64:
2426     case DataType::Type::kFloat32:
2427       return Location::FpuRegisterLocation(XMM0);
2428   }
2429 
2430   UNREACHABLE();
2431 }
2432 
GetMethodLocation() const2433 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2434   return Location::RegisterLocation(kMethodRegisterArgument);
2435 }
2436 
GetNextLocation(DataType::Type type)2437 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2438   switch (type) {
2439     case DataType::Type::kReference:
2440     case DataType::Type::kBool:
2441     case DataType::Type::kUint8:
2442     case DataType::Type::kInt8:
2443     case DataType::Type::kUint16:
2444     case DataType::Type::kInt16:
2445     case DataType::Type::kInt32: {
2446       uint32_t index = gp_index_++;
2447       stack_index_++;
2448       if (index < calling_convention.GetNumberOfRegisters()) {
2449         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2450       } else {
2451         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2452       }
2453     }
2454 
2455     case DataType::Type::kInt64: {
2456       uint32_t index = gp_index_;
2457       stack_index_ += 2;
2458       if (index < calling_convention.GetNumberOfRegisters()) {
2459         gp_index_ += 1;
2460         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2461       } else {
2462         gp_index_ += 2;
2463         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2464       }
2465     }
2466 
2467     case DataType::Type::kFloat32: {
2468       uint32_t index = float_index_++;
2469       stack_index_++;
2470       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2471         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2472       } else {
2473         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2474       }
2475     }
2476 
2477     case DataType::Type::kFloat64: {
2478       uint32_t index = float_index_++;
2479       stack_index_ += 2;
2480       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2481         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2482       } else {
2483         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2484       }
2485     }
2486 
2487     case DataType::Type::kUint32:
2488     case DataType::Type::kUint64:
2489     case DataType::Type::kVoid:
2490       LOG(FATAL) << "Unexpected parameter type " << type;
2491       UNREACHABLE();
2492   }
2493   return Location::NoLocation();
2494 }
2495 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2496 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2497   // The trampoline uses the same calling convention as dex calling conventions,
2498   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2499   // the method_idx.
2500   HandleInvoke(invoke);
2501 }
2502 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2503 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2504   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2505 }
2506 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2507 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2508   // Explicit clinit checks triggered by static invokes must have been pruned by
2509   // art::PrepareForRegisterAllocation.
2510   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2511 
2512   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2513   if (intrinsic.TryDispatch(invoke)) {
2514     return;
2515   }
2516 
2517   HandleInvoke(invoke);
2518 }
2519 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2520 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2521   if (invoke->GetLocations()->Intrinsified()) {
2522     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2523     intrinsic.Dispatch(invoke);
2524     return true;
2525   }
2526   return false;
2527 }
2528 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2529 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2530   // Explicit clinit checks triggered by static invokes must have been pruned by
2531   // art::PrepareForRegisterAllocation.
2532   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2533 
2534   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2535     return;
2536   }
2537 
2538   LocationSummary* locations = invoke->GetLocations();
2539   codegen_->GenerateStaticOrDirectCall(
2540       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2541 }
2542 
HandleInvoke(HInvoke * invoke)2543 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2544   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2545   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2546 }
2547 
VisitInvokeVirtual(HInvokeVirtual * invoke)2548 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2549   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2550   if (intrinsic.TryDispatch(invoke)) {
2551     return;
2552   }
2553 
2554   HandleInvoke(invoke);
2555 }
2556 
VisitInvokeVirtual(HInvokeVirtual * invoke)2557 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2558   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2559     return;
2560   }
2561 
2562   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2563   DCHECK(!codegen_->IsLeafMethod());
2564 }
2565 
VisitInvokeInterface(HInvokeInterface * invoke)2566 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2567   HandleInvoke(invoke);
2568   // Add the hidden argument.
2569   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2570 }
2571 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2572 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2573                                                         CpuRegister klass) {
2574   DCHECK_EQ(RDI, klass.AsRegister());
2575   // We know the destination of an intrinsic, so no need to record inline
2576   // caches.
2577   if (!instruction->GetLocations()->Intrinsified() &&
2578       GetGraph()->IsCompilingBaseline() &&
2579       !Runtime::Current()->IsAotCompiler()) {
2580     ScopedObjectAccess soa(Thread::Current());
2581     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2582     if (info != nullptr) {
2583       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2584       uint64_t address = reinterpret_cast64<uint64_t>(cache);
2585       NearLabel done;
2586       __ movq(CpuRegister(TMP), Immediate(address));
2587       // Fast path for a monomorphic cache.
2588       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2589       __ j(kEqual, &done);
2590       GenerateInvokeRuntime(
2591           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2592       __ Bind(&done);
2593     }
2594   }
2595 }
2596 
VisitInvokeInterface(HInvokeInterface * invoke)2597 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2598   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2599   LocationSummary* locations = invoke->GetLocations();
2600   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2601   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2602   Location receiver = locations->InAt(0);
2603   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2604 
2605   if (receiver.IsStackSlot()) {
2606     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2607     // /* HeapReference<Class> */ temp = temp->klass_
2608     __ movl(temp, Address(temp, class_offset));
2609   } else {
2610     // /* HeapReference<Class> */ temp = receiver->klass_
2611     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2612   }
2613   codegen_->MaybeRecordImplicitNullCheck(invoke);
2614   // Instead of simply (possibly) unpoisoning `temp` here, we should
2615   // emit a read barrier for the previous class reference load.
2616   // However this is not required in practice, as this is an
2617   // intermediate/temporary reference and because the current
2618   // concurrent copying collector keeps the from-space memory
2619   // intact/accessible until the end of the marking phase (the
2620   // concurrent copying collector may not in the future).
2621   __ MaybeUnpoisonHeapReference(temp);
2622 
2623   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2624 
2625   // Set the hidden argument. This is safe to do this here, as RAX
2626   // won't be modified thereafter, before the `call` instruction.
2627   // We also di it after MaybeGenerateInlineCache that may use RAX.
2628   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2629   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2630 
2631   // temp = temp->GetAddressOfIMT()
2632   __ movq(temp,
2633       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2634   // temp = temp->GetImtEntryAt(method_offset);
2635   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2636       invoke->GetImtIndex(), kX86_64PointerSize));
2637   // temp = temp->GetImtEntryAt(method_offset);
2638   __ movq(temp, Address(temp, method_offset));
2639   // call temp->GetEntryPoint();
2640   __ call(Address(
2641       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2642 
2643   DCHECK(!codegen_->IsLeafMethod());
2644   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2645 }
2646 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2647 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2648   HandleInvoke(invoke);
2649 }
2650 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2651 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2652   codegen_->GenerateInvokePolymorphicCall(invoke);
2653 }
2654 
VisitInvokeCustom(HInvokeCustom * invoke)2655 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2656   HandleInvoke(invoke);
2657 }
2658 
VisitInvokeCustom(HInvokeCustom * invoke)2659 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2660   codegen_->GenerateInvokeCustomCall(invoke);
2661 }
2662 
VisitNeg(HNeg * neg)2663 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2664   LocationSummary* locations =
2665       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2666   switch (neg->GetResultType()) {
2667     case DataType::Type::kInt32:
2668     case DataType::Type::kInt64:
2669       locations->SetInAt(0, Location::RequiresRegister());
2670       locations->SetOut(Location::SameAsFirstInput());
2671       break;
2672 
2673     case DataType::Type::kFloat32:
2674     case DataType::Type::kFloat64:
2675       locations->SetInAt(0, Location::RequiresFpuRegister());
2676       locations->SetOut(Location::SameAsFirstInput());
2677       locations->AddTemp(Location::RequiresFpuRegister());
2678       break;
2679 
2680     default:
2681       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2682   }
2683 }
2684 
VisitNeg(HNeg * neg)2685 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2686   LocationSummary* locations = neg->GetLocations();
2687   Location out = locations->Out();
2688   Location in = locations->InAt(0);
2689   switch (neg->GetResultType()) {
2690     case DataType::Type::kInt32:
2691       DCHECK(in.IsRegister());
2692       DCHECK(in.Equals(out));
2693       __ negl(out.AsRegister<CpuRegister>());
2694       break;
2695 
2696     case DataType::Type::kInt64:
2697       DCHECK(in.IsRegister());
2698       DCHECK(in.Equals(out));
2699       __ negq(out.AsRegister<CpuRegister>());
2700       break;
2701 
2702     case DataType::Type::kFloat32: {
2703       DCHECK(in.Equals(out));
2704       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2705       // Implement float negation with an exclusive or with value
2706       // 0x80000000 (mask for bit 31, representing the sign of a
2707       // single-precision floating-point number).
2708       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2709       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2710       break;
2711     }
2712 
2713     case DataType::Type::kFloat64: {
2714       DCHECK(in.Equals(out));
2715       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2716       // Implement double negation with an exclusive or with value
2717       // 0x8000000000000000 (mask for bit 63, representing the sign of
2718       // a double-precision floating-point number).
2719       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2720       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2721       break;
2722     }
2723 
2724     default:
2725       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2726   }
2727 }
2728 
VisitTypeConversion(HTypeConversion * conversion)2729 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2730   LocationSummary* locations =
2731       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2732   DataType::Type result_type = conversion->GetResultType();
2733   DataType::Type input_type = conversion->GetInputType();
2734   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2735       << input_type << " -> " << result_type;
2736 
2737   switch (result_type) {
2738     case DataType::Type::kUint8:
2739     case DataType::Type::kInt8:
2740     case DataType::Type::kUint16:
2741     case DataType::Type::kInt16:
2742       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2743       locations->SetInAt(0, Location::Any());
2744       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2745       break;
2746 
2747     case DataType::Type::kInt32:
2748       switch (input_type) {
2749         case DataType::Type::kInt64:
2750           locations->SetInAt(0, Location::Any());
2751           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2752           break;
2753 
2754         case DataType::Type::kFloat32:
2755           locations->SetInAt(0, Location::RequiresFpuRegister());
2756           locations->SetOut(Location::RequiresRegister());
2757           break;
2758 
2759         case DataType::Type::kFloat64:
2760           locations->SetInAt(0, Location::RequiresFpuRegister());
2761           locations->SetOut(Location::RequiresRegister());
2762           break;
2763 
2764         default:
2765           LOG(FATAL) << "Unexpected type conversion from " << input_type
2766                      << " to " << result_type;
2767       }
2768       break;
2769 
2770     case DataType::Type::kInt64:
2771       switch (input_type) {
2772         case DataType::Type::kBool:
2773         case DataType::Type::kUint8:
2774         case DataType::Type::kInt8:
2775         case DataType::Type::kUint16:
2776         case DataType::Type::kInt16:
2777         case DataType::Type::kInt32:
2778           // TODO: We would benefit from a (to-be-implemented)
2779           // Location::RegisterOrStackSlot requirement for this input.
2780           locations->SetInAt(0, Location::RequiresRegister());
2781           locations->SetOut(Location::RequiresRegister());
2782           break;
2783 
2784         case DataType::Type::kFloat32:
2785           locations->SetInAt(0, Location::RequiresFpuRegister());
2786           locations->SetOut(Location::RequiresRegister());
2787           break;
2788 
2789         case DataType::Type::kFloat64:
2790           locations->SetInAt(0, Location::RequiresFpuRegister());
2791           locations->SetOut(Location::RequiresRegister());
2792           break;
2793 
2794         default:
2795           LOG(FATAL) << "Unexpected type conversion from " << input_type
2796                      << " to " << result_type;
2797       }
2798       break;
2799 
2800     case DataType::Type::kFloat32:
2801       switch (input_type) {
2802         case DataType::Type::kBool:
2803         case DataType::Type::kUint8:
2804         case DataType::Type::kInt8:
2805         case DataType::Type::kUint16:
2806         case DataType::Type::kInt16:
2807         case DataType::Type::kInt32:
2808           locations->SetInAt(0, Location::Any());
2809           locations->SetOut(Location::RequiresFpuRegister());
2810           break;
2811 
2812         case DataType::Type::kInt64:
2813           locations->SetInAt(0, Location::Any());
2814           locations->SetOut(Location::RequiresFpuRegister());
2815           break;
2816 
2817         case DataType::Type::kFloat64:
2818           locations->SetInAt(0, Location::Any());
2819           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2820           break;
2821 
2822         default:
2823           LOG(FATAL) << "Unexpected type conversion from " << input_type
2824                      << " to " << result_type;
2825       }
2826       break;
2827 
2828     case DataType::Type::kFloat64:
2829       switch (input_type) {
2830         case DataType::Type::kBool:
2831         case DataType::Type::kUint8:
2832         case DataType::Type::kInt8:
2833         case DataType::Type::kUint16:
2834         case DataType::Type::kInt16:
2835         case DataType::Type::kInt32:
2836           locations->SetInAt(0, Location::Any());
2837           locations->SetOut(Location::RequiresFpuRegister());
2838           break;
2839 
2840         case DataType::Type::kInt64:
2841           locations->SetInAt(0, Location::Any());
2842           locations->SetOut(Location::RequiresFpuRegister());
2843           break;
2844 
2845         case DataType::Type::kFloat32:
2846           locations->SetInAt(0, Location::Any());
2847           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2848           break;
2849 
2850         default:
2851           LOG(FATAL) << "Unexpected type conversion from " << input_type
2852                      << " to " << result_type;
2853       }
2854       break;
2855 
2856     default:
2857       LOG(FATAL) << "Unexpected type conversion from " << input_type
2858                  << " to " << result_type;
2859   }
2860 }
2861 
VisitTypeConversion(HTypeConversion * conversion)2862 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2863   LocationSummary* locations = conversion->GetLocations();
2864   Location out = locations->Out();
2865   Location in = locations->InAt(0);
2866   DataType::Type result_type = conversion->GetResultType();
2867   DataType::Type input_type = conversion->GetInputType();
2868   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2869       << input_type << " -> " << result_type;
2870   switch (result_type) {
2871     case DataType::Type::kUint8:
2872       switch (input_type) {
2873         case DataType::Type::kInt8:
2874         case DataType::Type::kUint16:
2875         case DataType::Type::kInt16:
2876         case DataType::Type::kInt32:
2877         case DataType::Type::kInt64:
2878           if (in.IsRegister()) {
2879             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2880           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2881             __ movzxb(out.AsRegister<CpuRegister>(),
2882                       Address(CpuRegister(RSP), in.GetStackIndex()));
2883           } else {
2884             __ movl(out.AsRegister<CpuRegister>(),
2885                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2886           }
2887           break;
2888 
2889         default:
2890           LOG(FATAL) << "Unexpected type conversion from " << input_type
2891                      << " to " << result_type;
2892       }
2893       break;
2894 
2895     case DataType::Type::kInt8:
2896       switch (input_type) {
2897         case DataType::Type::kUint8:
2898         case DataType::Type::kUint16:
2899         case DataType::Type::kInt16:
2900         case DataType::Type::kInt32:
2901         case DataType::Type::kInt64:
2902           if (in.IsRegister()) {
2903             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2904           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2905             __ movsxb(out.AsRegister<CpuRegister>(),
2906                       Address(CpuRegister(RSP), in.GetStackIndex()));
2907           } else {
2908             __ movl(out.AsRegister<CpuRegister>(),
2909                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2910           }
2911           break;
2912 
2913         default:
2914           LOG(FATAL) << "Unexpected type conversion from " << input_type
2915                      << " to " << result_type;
2916       }
2917       break;
2918 
2919     case DataType::Type::kUint16:
2920       switch (input_type) {
2921         case DataType::Type::kInt8:
2922         case DataType::Type::kInt16:
2923         case DataType::Type::kInt32:
2924         case DataType::Type::kInt64:
2925           if (in.IsRegister()) {
2926             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2927           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2928             __ movzxw(out.AsRegister<CpuRegister>(),
2929                       Address(CpuRegister(RSP), in.GetStackIndex()));
2930           } else {
2931             __ movl(out.AsRegister<CpuRegister>(),
2932                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2933           }
2934           break;
2935 
2936         default:
2937           LOG(FATAL) << "Unexpected type conversion from " << input_type
2938                      << " to " << result_type;
2939       }
2940       break;
2941 
2942     case DataType::Type::kInt16:
2943       switch (input_type) {
2944         case DataType::Type::kUint16:
2945         case DataType::Type::kInt32:
2946         case DataType::Type::kInt64:
2947           if (in.IsRegister()) {
2948             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2949           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2950             __ movsxw(out.AsRegister<CpuRegister>(),
2951                       Address(CpuRegister(RSP), in.GetStackIndex()));
2952           } else {
2953             __ movl(out.AsRegister<CpuRegister>(),
2954                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2955           }
2956           break;
2957 
2958         default:
2959           LOG(FATAL) << "Unexpected type conversion from " << input_type
2960                      << " to " << result_type;
2961       }
2962       break;
2963 
2964     case DataType::Type::kInt32:
2965       switch (input_type) {
2966         case DataType::Type::kInt64:
2967           if (in.IsRegister()) {
2968             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2969           } else if (in.IsDoubleStackSlot()) {
2970             __ movl(out.AsRegister<CpuRegister>(),
2971                     Address(CpuRegister(RSP), in.GetStackIndex()));
2972           } else {
2973             DCHECK(in.IsConstant());
2974             DCHECK(in.GetConstant()->IsLongConstant());
2975             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2976             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2977           }
2978           break;
2979 
2980         case DataType::Type::kFloat32: {
2981           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2982           CpuRegister output = out.AsRegister<CpuRegister>();
2983           NearLabel done, nan;
2984 
2985           __ movl(output, Immediate(kPrimIntMax));
2986           // if input >= (float)INT_MAX goto done
2987           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
2988           __ j(kAboveEqual, &done);
2989           // if input == NaN goto nan
2990           __ j(kUnordered, &nan);
2991           // output = float-to-int-truncate(input)
2992           __ cvttss2si(output, input, false);
2993           __ jmp(&done);
2994           __ Bind(&nan);
2995           //  output = 0
2996           __ xorl(output, output);
2997           __ Bind(&done);
2998           break;
2999         }
3000 
3001         case DataType::Type::kFloat64: {
3002           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3003           CpuRegister output = out.AsRegister<CpuRegister>();
3004           NearLabel done, nan;
3005 
3006           __ movl(output, Immediate(kPrimIntMax));
3007           // if input >= (double)INT_MAX goto done
3008           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3009           __ j(kAboveEqual, &done);
3010           // if input == NaN goto nan
3011           __ j(kUnordered, &nan);
3012           // output = double-to-int-truncate(input)
3013           __ cvttsd2si(output, input);
3014           __ jmp(&done);
3015           __ Bind(&nan);
3016           //  output = 0
3017           __ xorl(output, output);
3018           __ Bind(&done);
3019           break;
3020         }
3021 
3022         default:
3023           LOG(FATAL) << "Unexpected type conversion from " << input_type
3024                      << " to " << result_type;
3025       }
3026       break;
3027 
3028     case DataType::Type::kInt64:
3029       switch (input_type) {
3030         DCHECK(out.IsRegister());
3031         case DataType::Type::kBool:
3032         case DataType::Type::kUint8:
3033         case DataType::Type::kInt8:
3034         case DataType::Type::kUint16:
3035         case DataType::Type::kInt16:
3036         case DataType::Type::kInt32:
3037           DCHECK(in.IsRegister());
3038           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3039           break;
3040 
3041         case DataType::Type::kFloat32: {
3042           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3043           CpuRegister output = out.AsRegister<CpuRegister>();
3044           NearLabel done, nan;
3045 
3046           codegen_->Load64BitValue(output, kPrimLongMax);
3047           // if input >= (float)LONG_MAX goto done
3048           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3049           __ j(kAboveEqual, &done);
3050           // if input == NaN goto nan
3051           __ j(kUnordered, &nan);
3052           // output = float-to-long-truncate(input)
3053           __ cvttss2si(output, input, true);
3054           __ jmp(&done);
3055           __ Bind(&nan);
3056           //  output = 0
3057           __ xorl(output, output);
3058           __ Bind(&done);
3059           break;
3060         }
3061 
3062         case DataType::Type::kFloat64: {
3063           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3064           CpuRegister output = out.AsRegister<CpuRegister>();
3065           NearLabel done, nan;
3066 
3067           codegen_->Load64BitValue(output, kPrimLongMax);
3068           // if input >= (double)LONG_MAX goto done
3069           __ comisd(input, codegen_->LiteralDoubleAddress(
3070                 static_cast<double>(kPrimLongMax)));
3071           __ j(kAboveEqual, &done);
3072           // if input == NaN goto nan
3073           __ j(kUnordered, &nan);
3074           // output = double-to-long-truncate(input)
3075           __ cvttsd2si(output, input, true);
3076           __ jmp(&done);
3077           __ Bind(&nan);
3078           //  output = 0
3079           __ xorl(output, output);
3080           __ Bind(&done);
3081           break;
3082         }
3083 
3084         default:
3085           LOG(FATAL) << "Unexpected type conversion from " << input_type
3086                      << " to " << result_type;
3087       }
3088       break;
3089 
3090     case DataType::Type::kFloat32:
3091       switch (input_type) {
3092         case DataType::Type::kBool:
3093         case DataType::Type::kUint8:
3094         case DataType::Type::kInt8:
3095         case DataType::Type::kUint16:
3096         case DataType::Type::kInt16:
3097         case DataType::Type::kInt32:
3098           if (in.IsRegister()) {
3099             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3100           } else if (in.IsConstant()) {
3101             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3102             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3103             codegen_->Load32BitValue(dest, static_cast<float>(v));
3104           } else {
3105             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3106                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3107           }
3108           break;
3109 
3110         case DataType::Type::kInt64:
3111           if (in.IsRegister()) {
3112             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3113           } else if (in.IsConstant()) {
3114             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3115             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3116             codegen_->Load32BitValue(dest, static_cast<float>(v));
3117           } else {
3118             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3119                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3120           }
3121           break;
3122 
3123         case DataType::Type::kFloat64:
3124           if (in.IsFpuRegister()) {
3125             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3126           } else if (in.IsConstant()) {
3127             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3128             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3129             codegen_->Load32BitValue(dest, static_cast<float>(v));
3130           } else {
3131             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3132                         Address(CpuRegister(RSP), in.GetStackIndex()));
3133           }
3134           break;
3135 
3136         default:
3137           LOG(FATAL) << "Unexpected type conversion from " << input_type
3138                      << " to " << result_type;
3139       }
3140       break;
3141 
3142     case DataType::Type::kFloat64:
3143       switch (input_type) {
3144         case DataType::Type::kBool:
3145         case DataType::Type::kUint8:
3146         case DataType::Type::kInt8:
3147         case DataType::Type::kUint16:
3148         case DataType::Type::kInt16:
3149         case DataType::Type::kInt32:
3150           if (in.IsRegister()) {
3151             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3152           } else if (in.IsConstant()) {
3153             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3154             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3155             codegen_->Load64BitValue(dest, static_cast<double>(v));
3156           } else {
3157             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3158                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3159           }
3160           break;
3161 
3162         case DataType::Type::kInt64:
3163           if (in.IsRegister()) {
3164             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3165           } else if (in.IsConstant()) {
3166             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3167             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3168             codegen_->Load64BitValue(dest, static_cast<double>(v));
3169           } else {
3170             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3171                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3172           }
3173           break;
3174 
3175         case DataType::Type::kFloat32:
3176           if (in.IsFpuRegister()) {
3177             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3178           } else if (in.IsConstant()) {
3179             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3180             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3181             codegen_->Load64BitValue(dest, static_cast<double>(v));
3182           } else {
3183             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3184                         Address(CpuRegister(RSP), in.GetStackIndex()));
3185           }
3186           break;
3187 
3188         default:
3189           LOG(FATAL) << "Unexpected type conversion from " << input_type
3190                      << " to " << result_type;
3191       }
3192       break;
3193 
3194     default:
3195       LOG(FATAL) << "Unexpected type conversion from " << input_type
3196                  << " to " << result_type;
3197   }
3198 }
3199 
VisitAdd(HAdd * add)3200 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3201   LocationSummary* locations =
3202       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3203   switch (add->GetResultType()) {
3204     case DataType::Type::kInt32: {
3205       locations->SetInAt(0, Location::RequiresRegister());
3206       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3207       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3208       break;
3209     }
3210 
3211     case DataType::Type::kInt64: {
3212       locations->SetInAt(0, Location::RequiresRegister());
3213       // We can use a leaq or addq if the constant can fit in an immediate.
3214       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3215       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3216       break;
3217     }
3218 
3219     case DataType::Type::kFloat64:
3220     case DataType::Type::kFloat32: {
3221       locations->SetInAt(0, Location::RequiresFpuRegister());
3222       locations->SetInAt(1, Location::Any());
3223       locations->SetOut(Location::SameAsFirstInput());
3224       break;
3225     }
3226 
3227     default:
3228       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3229   }
3230 }
3231 
VisitAdd(HAdd * add)3232 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3233   LocationSummary* locations = add->GetLocations();
3234   Location first = locations->InAt(0);
3235   Location second = locations->InAt(1);
3236   Location out = locations->Out();
3237 
3238   switch (add->GetResultType()) {
3239     case DataType::Type::kInt32: {
3240       if (second.IsRegister()) {
3241         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3242           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3243         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3244           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3245         } else {
3246           __ leal(out.AsRegister<CpuRegister>(), Address(
3247               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3248         }
3249       } else if (second.IsConstant()) {
3250         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3251           __ addl(out.AsRegister<CpuRegister>(),
3252                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3253         } else {
3254           __ leal(out.AsRegister<CpuRegister>(), Address(
3255               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3256         }
3257       } else {
3258         DCHECK(first.Equals(locations->Out()));
3259         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3260       }
3261       break;
3262     }
3263 
3264     case DataType::Type::kInt64: {
3265       if (second.IsRegister()) {
3266         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3267           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3268         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3269           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3270         } else {
3271           __ leaq(out.AsRegister<CpuRegister>(), Address(
3272               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3273         }
3274       } else {
3275         DCHECK(second.IsConstant());
3276         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3277         int32_t int32_value = Low32Bits(value);
3278         DCHECK_EQ(int32_value, value);
3279         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3280           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3281         } else {
3282           __ leaq(out.AsRegister<CpuRegister>(), Address(
3283               first.AsRegister<CpuRegister>(), int32_value));
3284         }
3285       }
3286       break;
3287     }
3288 
3289     case DataType::Type::kFloat32: {
3290       if (second.IsFpuRegister()) {
3291         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3292       } else if (second.IsConstant()) {
3293         __ addss(first.AsFpuRegister<XmmRegister>(),
3294                  codegen_->LiteralFloatAddress(
3295                      second.GetConstant()->AsFloatConstant()->GetValue()));
3296       } else {
3297         DCHECK(second.IsStackSlot());
3298         __ addss(first.AsFpuRegister<XmmRegister>(),
3299                  Address(CpuRegister(RSP), second.GetStackIndex()));
3300       }
3301       break;
3302     }
3303 
3304     case DataType::Type::kFloat64: {
3305       if (second.IsFpuRegister()) {
3306         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3307       } else if (second.IsConstant()) {
3308         __ addsd(first.AsFpuRegister<XmmRegister>(),
3309                  codegen_->LiteralDoubleAddress(
3310                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3311       } else {
3312         DCHECK(second.IsDoubleStackSlot());
3313         __ addsd(first.AsFpuRegister<XmmRegister>(),
3314                  Address(CpuRegister(RSP), second.GetStackIndex()));
3315       }
3316       break;
3317     }
3318 
3319     default:
3320       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3321   }
3322 }
3323 
VisitSub(HSub * sub)3324 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3325   LocationSummary* locations =
3326       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3327   switch (sub->GetResultType()) {
3328     case DataType::Type::kInt32: {
3329       locations->SetInAt(0, Location::RequiresRegister());
3330       locations->SetInAt(1, Location::Any());
3331       locations->SetOut(Location::SameAsFirstInput());
3332       break;
3333     }
3334     case DataType::Type::kInt64: {
3335       locations->SetInAt(0, Location::RequiresRegister());
3336       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3337       locations->SetOut(Location::SameAsFirstInput());
3338       break;
3339     }
3340     case DataType::Type::kFloat32:
3341     case DataType::Type::kFloat64: {
3342       locations->SetInAt(0, Location::RequiresFpuRegister());
3343       locations->SetInAt(1, Location::Any());
3344       locations->SetOut(Location::SameAsFirstInput());
3345       break;
3346     }
3347     default:
3348       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3349   }
3350 }
3351 
VisitSub(HSub * sub)3352 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3353   LocationSummary* locations = sub->GetLocations();
3354   Location first = locations->InAt(0);
3355   Location second = locations->InAt(1);
3356   DCHECK(first.Equals(locations->Out()));
3357   switch (sub->GetResultType()) {
3358     case DataType::Type::kInt32: {
3359       if (second.IsRegister()) {
3360         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3361       } else if (second.IsConstant()) {
3362         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3363         __ subl(first.AsRegister<CpuRegister>(), imm);
3364       } else {
3365         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3366       }
3367       break;
3368     }
3369     case DataType::Type::kInt64: {
3370       if (second.IsConstant()) {
3371         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3372         DCHECK(IsInt<32>(value));
3373         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3374       } else {
3375         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3376       }
3377       break;
3378     }
3379 
3380     case DataType::Type::kFloat32: {
3381       if (second.IsFpuRegister()) {
3382         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3383       } else if (second.IsConstant()) {
3384         __ subss(first.AsFpuRegister<XmmRegister>(),
3385                  codegen_->LiteralFloatAddress(
3386                      second.GetConstant()->AsFloatConstant()->GetValue()));
3387       } else {
3388         DCHECK(second.IsStackSlot());
3389         __ subss(first.AsFpuRegister<XmmRegister>(),
3390                  Address(CpuRegister(RSP), second.GetStackIndex()));
3391       }
3392       break;
3393     }
3394 
3395     case DataType::Type::kFloat64: {
3396       if (second.IsFpuRegister()) {
3397         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3398       } else if (second.IsConstant()) {
3399         __ subsd(first.AsFpuRegister<XmmRegister>(),
3400                  codegen_->LiteralDoubleAddress(
3401                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3402       } else {
3403         DCHECK(second.IsDoubleStackSlot());
3404         __ subsd(first.AsFpuRegister<XmmRegister>(),
3405                  Address(CpuRegister(RSP), second.GetStackIndex()));
3406       }
3407       break;
3408     }
3409 
3410     default:
3411       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3412   }
3413 }
3414 
VisitMul(HMul * mul)3415 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3416   LocationSummary* locations =
3417       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3418   switch (mul->GetResultType()) {
3419     case DataType::Type::kInt32: {
3420       locations->SetInAt(0, Location::RequiresRegister());
3421       locations->SetInAt(1, Location::Any());
3422       if (mul->InputAt(1)->IsIntConstant()) {
3423         // Can use 3 operand multiply.
3424         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3425       } else {
3426         locations->SetOut(Location::SameAsFirstInput());
3427       }
3428       break;
3429     }
3430     case DataType::Type::kInt64: {
3431       locations->SetInAt(0, Location::RequiresRegister());
3432       locations->SetInAt(1, Location::Any());
3433       if (mul->InputAt(1)->IsLongConstant() &&
3434           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3435         // Can use 3 operand multiply.
3436         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3437       } else {
3438         locations->SetOut(Location::SameAsFirstInput());
3439       }
3440       break;
3441     }
3442     case DataType::Type::kFloat32:
3443     case DataType::Type::kFloat64: {
3444       locations->SetInAt(0, Location::RequiresFpuRegister());
3445       locations->SetInAt(1, Location::Any());
3446       locations->SetOut(Location::SameAsFirstInput());
3447       break;
3448     }
3449 
3450     default:
3451       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3452   }
3453 }
3454 
VisitMul(HMul * mul)3455 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3456   LocationSummary* locations = mul->GetLocations();
3457   Location first = locations->InAt(0);
3458   Location second = locations->InAt(1);
3459   Location out = locations->Out();
3460   switch (mul->GetResultType()) {
3461     case DataType::Type::kInt32:
3462       // The constant may have ended up in a register, so test explicitly to avoid
3463       // problems where the output may not be the same as the first operand.
3464       if (mul->InputAt(1)->IsIntConstant()) {
3465         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3466         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3467       } else if (second.IsRegister()) {
3468         DCHECK(first.Equals(out));
3469         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3470       } else {
3471         DCHECK(first.Equals(out));
3472         DCHECK(second.IsStackSlot());
3473         __ imull(first.AsRegister<CpuRegister>(),
3474                  Address(CpuRegister(RSP), second.GetStackIndex()));
3475       }
3476       break;
3477     case DataType::Type::kInt64: {
3478       // The constant may have ended up in a register, so test explicitly to avoid
3479       // problems where the output may not be the same as the first operand.
3480       if (mul->InputAt(1)->IsLongConstant()) {
3481         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3482         if (IsInt<32>(value)) {
3483           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3484                    Immediate(static_cast<int32_t>(value)));
3485         } else {
3486           // Have to use the constant area.
3487           DCHECK(first.Equals(out));
3488           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3489         }
3490       } else if (second.IsRegister()) {
3491         DCHECK(first.Equals(out));
3492         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3493       } else {
3494         DCHECK(second.IsDoubleStackSlot());
3495         DCHECK(first.Equals(out));
3496         __ imulq(first.AsRegister<CpuRegister>(),
3497                  Address(CpuRegister(RSP), second.GetStackIndex()));
3498       }
3499       break;
3500     }
3501 
3502     case DataType::Type::kFloat32: {
3503       DCHECK(first.Equals(out));
3504       if (second.IsFpuRegister()) {
3505         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3506       } else if (second.IsConstant()) {
3507         __ mulss(first.AsFpuRegister<XmmRegister>(),
3508                  codegen_->LiteralFloatAddress(
3509                      second.GetConstant()->AsFloatConstant()->GetValue()));
3510       } else {
3511         DCHECK(second.IsStackSlot());
3512         __ mulss(first.AsFpuRegister<XmmRegister>(),
3513                  Address(CpuRegister(RSP), second.GetStackIndex()));
3514       }
3515       break;
3516     }
3517 
3518     case DataType::Type::kFloat64: {
3519       DCHECK(first.Equals(out));
3520       if (second.IsFpuRegister()) {
3521         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3522       } else if (second.IsConstant()) {
3523         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3524                  codegen_->LiteralDoubleAddress(
3525                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3526       } else {
3527         DCHECK(second.IsDoubleStackSlot());
3528         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3529                  Address(CpuRegister(RSP), second.GetStackIndex()));
3530       }
3531       break;
3532     }
3533 
3534     default:
3535       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3536   }
3537 }
3538 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3539 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3540                                                      uint32_t stack_adjustment, bool is_float) {
3541   if (source.IsStackSlot()) {
3542     DCHECK(is_float);
3543     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3544   } else if (source.IsDoubleStackSlot()) {
3545     DCHECK(!is_float);
3546     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3547   } else {
3548     // Write the value to the temporary location on the stack and load to FP stack.
3549     if (is_float) {
3550       Location stack_temp = Location::StackSlot(temp_offset);
3551       codegen_->Move(stack_temp, source);
3552       __ flds(Address(CpuRegister(RSP), temp_offset));
3553     } else {
3554       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3555       codegen_->Move(stack_temp, source);
3556       __ fldl(Address(CpuRegister(RSP), temp_offset));
3557     }
3558   }
3559 }
3560 
GenerateRemFP(HRem * rem)3561 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3562   DataType::Type type = rem->GetResultType();
3563   bool is_float = type == DataType::Type::kFloat32;
3564   size_t elem_size = DataType::Size(type);
3565   LocationSummary* locations = rem->GetLocations();
3566   Location first = locations->InAt(0);
3567   Location second = locations->InAt(1);
3568   Location out = locations->Out();
3569 
3570   // Create stack space for 2 elements.
3571   // TODO: enhance register allocator to ask for stack temporaries.
3572   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3573 
3574   // Load the values to the FP stack in reverse order, using temporaries if needed.
3575   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3576   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3577 
3578   // Loop doing FPREM until we stabilize.
3579   NearLabel retry;
3580   __ Bind(&retry);
3581   __ fprem();
3582 
3583   // Move FP status to AX.
3584   __ fstsw();
3585 
3586   // And see if the argument reduction is complete. This is signaled by the
3587   // C2 FPU flag bit set to 0.
3588   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3589   __ j(kNotEqual, &retry);
3590 
3591   // We have settled on the final value. Retrieve it into an XMM register.
3592   // Store FP top of stack to real stack.
3593   if (is_float) {
3594     __ fsts(Address(CpuRegister(RSP), 0));
3595   } else {
3596     __ fstl(Address(CpuRegister(RSP), 0));
3597   }
3598 
3599   // Pop the 2 items from the FP stack.
3600   __ fucompp();
3601 
3602   // Load the value from the stack into an XMM register.
3603   DCHECK(out.IsFpuRegister()) << out;
3604   if (is_float) {
3605     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3606   } else {
3607     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3608   }
3609 
3610   // And remove the temporary stack space we allocated.
3611   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3612 }
3613 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3614 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3615   DCHECK(instruction->IsDiv() || instruction->IsRem());
3616 
3617   LocationSummary* locations = instruction->GetLocations();
3618   Location second = locations->InAt(1);
3619   DCHECK(second.IsConstant());
3620 
3621   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3622   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3623   int64_t imm = Int64FromConstant(second.GetConstant());
3624 
3625   DCHECK(imm == 1 || imm == -1);
3626 
3627   switch (instruction->GetResultType()) {
3628     case DataType::Type::kInt32: {
3629       if (instruction->IsRem()) {
3630         __ xorl(output_register, output_register);
3631       } else {
3632         __ movl(output_register, input_register);
3633         if (imm == -1) {
3634           __ negl(output_register);
3635         }
3636       }
3637       break;
3638     }
3639 
3640     case DataType::Type::kInt64: {
3641       if (instruction->IsRem()) {
3642         __ xorl(output_register, output_register);
3643       } else {
3644         __ movq(output_register, input_register);
3645         if (imm == -1) {
3646           __ negq(output_register);
3647         }
3648       }
3649       break;
3650     }
3651 
3652     default:
3653       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3654   }
3655 }
RemByPowerOfTwo(HRem * instruction)3656 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3657   LocationSummary* locations = instruction->GetLocations();
3658   Location second = locations->InAt(1);
3659   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3660   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3661   int64_t imm = Int64FromConstant(second.GetConstant());
3662   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3663   uint64_t abs_imm = AbsOrMin(imm);
3664   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3665   if (instruction->GetResultType() == DataType::Type::kInt32) {
3666     NearLabel done;
3667     __ movl(out, numerator);
3668     __ andl(out, Immediate(abs_imm-1));
3669     __ j(Condition::kZero, &done);
3670     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3671     __ testl(numerator, numerator);
3672     __ cmov(Condition::kLess, out, tmp, false);
3673     __ Bind(&done);
3674 
3675   } else {
3676     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3677     codegen_->Load64BitValue(tmp, abs_imm - 1);
3678     NearLabel done;
3679 
3680     __ movq(out, numerator);
3681     __ andq(out, tmp);
3682     __ j(Condition::kZero, &done);
3683     __ movq(tmp, numerator);
3684     __ sarq(tmp, Immediate(63));
3685     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3686     __ orq(out, tmp);
3687     __ Bind(&done);
3688   }
3689 }
DivByPowerOfTwo(HDiv * instruction)3690 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3691   LocationSummary* locations = instruction->GetLocations();
3692   Location second = locations->InAt(1);
3693 
3694   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3695   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3696 
3697   int64_t imm = Int64FromConstant(second.GetConstant());
3698   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3699   uint64_t abs_imm = AbsOrMin(imm);
3700 
3701   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3702 
3703   if (instruction->GetResultType() == DataType::Type::kInt32) {
3704     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3705     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3706     if (abs_imm == 2) {
3707       __ leal(tmp, Address(numerator, 0));
3708       __ shrl(tmp, Immediate(31));
3709       __ addl(tmp, numerator);
3710     } else {
3711       __ leal(tmp, Address(numerator, abs_imm - 1));
3712       __ testl(numerator, numerator);
3713       __ cmov(kGreaterEqual, tmp, numerator);
3714     }
3715     int shift = CTZ(imm);
3716     __ sarl(tmp, Immediate(shift));
3717 
3718     if (imm < 0) {
3719       __ negl(tmp);
3720     }
3721 
3722     __ movl(output_register, tmp);
3723   } else {
3724     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3725     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3726     if (abs_imm == 2) {
3727       __ movq(rdx, numerator);
3728       __ shrq(rdx, Immediate(63));
3729       __ addq(rdx, numerator);
3730     } else {
3731       codegen_->Load64BitValue(rdx, abs_imm - 1);
3732       __ addq(rdx, numerator);
3733       __ testq(numerator, numerator);
3734       __ cmov(kGreaterEqual, rdx, numerator);
3735     }
3736     int shift = CTZ(imm);
3737     __ sarq(rdx, Immediate(shift));
3738 
3739     if (imm < 0) {
3740       __ negq(rdx);
3741     }
3742 
3743     __ movq(output_register, rdx);
3744   }
3745 }
3746 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3747 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3748   DCHECK(instruction->IsDiv() || instruction->IsRem());
3749 
3750   LocationSummary* locations = instruction->GetLocations();
3751   Location second = locations->InAt(1);
3752 
3753   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3754       : locations->GetTemp(0).AsRegister<CpuRegister>();
3755   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3756   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3757       : locations->Out().AsRegister<CpuRegister>();
3758   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3759 
3760   DCHECK_EQ(RAX, eax.AsRegister());
3761   DCHECK_EQ(RDX, edx.AsRegister());
3762   if (instruction->IsDiv()) {
3763     DCHECK_EQ(RAX, out.AsRegister());
3764   } else {
3765     DCHECK_EQ(RDX, out.AsRegister());
3766   }
3767 
3768   int64_t magic;
3769   int shift;
3770 
3771   // TODO: can these branches be written as one?
3772   if (instruction->GetResultType() == DataType::Type::kInt32) {
3773     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3774 
3775     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3776 
3777     __ movl(numerator, eax);
3778 
3779     __ movl(eax, Immediate(magic));
3780     __ imull(numerator);
3781 
3782     if (imm > 0 && magic < 0) {
3783       __ addl(edx, numerator);
3784     } else if (imm < 0 && magic > 0) {
3785       __ subl(edx, numerator);
3786     }
3787 
3788     if (shift != 0) {
3789       __ sarl(edx, Immediate(shift));
3790     }
3791 
3792     __ movl(eax, edx);
3793     __ shrl(edx, Immediate(31));
3794     __ addl(edx, eax);
3795 
3796     if (instruction->IsRem()) {
3797       __ movl(eax, numerator);
3798       __ imull(edx, Immediate(imm));
3799       __ subl(eax, edx);
3800       __ movl(edx, eax);
3801     } else {
3802       __ movl(eax, edx);
3803     }
3804   } else {
3805     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3806 
3807     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3808 
3809     CpuRegister rax = eax;
3810     CpuRegister rdx = edx;
3811 
3812     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
3813 
3814     // Save the numerator.
3815     __ movq(numerator, rax);
3816 
3817     // RAX = magic
3818     codegen_->Load64BitValue(rax, magic);
3819 
3820     // RDX:RAX = magic * numerator
3821     __ imulq(numerator);
3822 
3823     if (imm > 0 && magic < 0) {
3824       // RDX += numerator
3825       __ addq(rdx, numerator);
3826     } else if (imm < 0 && magic > 0) {
3827       // RDX -= numerator
3828       __ subq(rdx, numerator);
3829     }
3830 
3831     // Shift if needed.
3832     if (shift != 0) {
3833       __ sarq(rdx, Immediate(shift));
3834     }
3835 
3836     // RDX += 1 if RDX < 0
3837     __ movq(rax, rdx);
3838     __ shrq(rdx, Immediate(63));
3839     __ addq(rdx, rax);
3840 
3841     if (instruction->IsRem()) {
3842       __ movq(rax, numerator);
3843 
3844       if (IsInt<32>(imm)) {
3845         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3846       } else {
3847         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3848       }
3849 
3850       __ subq(rax, rdx);
3851       __ movq(rdx, rax);
3852     } else {
3853       __ movq(rax, rdx);
3854     }
3855   }
3856 }
3857 
GenerateDivRemIntegral(HBinaryOperation * instruction)3858 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3859   DCHECK(instruction->IsDiv() || instruction->IsRem());
3860   DataType::Type type = instruction->GetResultType();
3861   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3862 
3863   bool is_div = instruction->IsDiv();
3864   LocationSummary* locations = instruction->GetLocations();
3865 
3866   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3867   Location second = locations->InAt(1);
3868 
3869   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3870   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3871 
3872   if (second.IsConstant()) {
3873     int64_t imm = Int64FromConstant(second.GetConstant());
3874 
3875     if (imm == 0) {
3876       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3877     } else if (imm == 1 || imm == -1) {
3878       DivRemOneOrMinusOne(instruction);
3879     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3880       if (is_div) {
3881         DivByPowerOfTwo(instruction->AsDiv());
3882       } else {
3883         RemByPowerOfTwo(instruction->AsRem());
3884       }
3885     } else {
3886       DCHECK(imm <= -2 || imm >= 2);
3887       GenerateDivRemWithAnyConstant(instruction);
3888     }
3889   } else {
3890     SlowPathCode* slow_path =
3891         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3892             instruction, out.AsRegister(), type, is_div);
3893     codegen_->AddSlowPath(slow_path);
3894 
3895     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3896     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3897     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3898     // so it's safe to just use negl instead of more complex comparisons.
3899     if (type == DataType::Type::kInt32) {
3900       __ cmpl(second_reg, Immediate(-1));
3901       __ j(kEqual, slow_path->GetEntryLabel());
3902       // edx:eax <- sign-extended of eax
3903       __ cdq();
3904       // eax = quotient, edx = remainder
3905       __ idivl(second_reg);
3906     } else {
3907       __ cmpq(second_reg, Immediate(-1));
3908       __ j(kEqual, slow_path->GetEntryLabel());
3909       // rdx:rax <- sign-extended of rax
3910       __ cqo();
3911       // rax = quotient, rdx = remainder
3912       __ idivq(second_reg);
3913     }
3914     __ Bind(slow_path->GetExitLabel());
3915   }
3916 }
3917 
VisitDiv(HDiv * div)3918 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3919   LocationSummary* locations =
3920       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3921   switch (div->GetResultType()) {
3922     case DataType::Type::kInt32:
3923     case DataType::Type::kInt64: {
3924       locations->SetInAt(0, Location::RegisterLocation(RAX));
3925       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3926       locations->SetOut(Location::SameAsFirstInput());
3927       // Intel uses edx:eax as the dividend.
3928       locations->AddTemp(Location::RegisterLocation(RDX));
3929       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3930       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3931       // output and request another temp.
3932       if (div->InputAt(1)->IsConstant()) {
3933         locations->AddTemp(Location::RequiresRegister());
3934       }
3935       break;
3936     }
3937 
3938     case DataType::Type::kFloat32:
3939     case DataType::Type::kFloat64: {
3940       locations->SetInAt(0, Location::RequiresFpuRegister());
3941       locations->SetInAt(1, Location::Any());
3942       locations->SetOut(Location::SameAsFirstInput());
3943       break;
3944     }
3945 
3946     default:
3947       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3948   }
3949 }
3950 
VisitDiv(HDiv * div)3951 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3952   LocationSummary* locations = div->GetLocations();
3953   Location first = locations->InAt(0);
3954   Location second = locations->InAt(1);
3955   DCHECK(first.Equals(locations->Out()));
3956 
3957   DataType::Type type = div->GetResultType();
3958   switch (type) {
3959     case DataType::Type::kInt32:
3960     case DataType::Type::kInt64: {
3961       GenerateDivRemIntegral(div);
3962       break;
3963     }
3964 
3965     case DataType::Type::kFloat32: {
3966       if (second.IsFpuRegister()) {
3967         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3968       } else if (second.IsConstant()) {
3969         __ divss(first.AsFpuRegister<XmmRegister>(),
3970                  codegen_->LiteralFloatAddress(
3971                      second.GetConstant()->AsFloatConstant()->GetValue()));
3972       } else {
3973         DCHECK(second.IsStackSlot());
3974         __ divss(first.AsFpuRegister<XmmRegister>(),
3975                  Address(CpuRegister(RSP), second.GetStackIndex()));
3976       }
3977       break;
3978     }
3979 
3980     case DataType::Type::kFloat64: {
3981       if (second.IsFpuRegister()) {
3982         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3983       } else if (second.IsConstant()) {
3984         __ divsd(first.AsFpuRegister<XmmRegister>(),
3985                  codegen_->LiteralDoubleAddress(
3986                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3987       } else {
3988         DCHECK(second.IsDoubleStackSlot());
3989         __ divsd(first.AsFpuRegister<XmmRegister>(),
3990                  Address(CpuRegister(RSP), second.GetStackIndex()));
3991       }
3992       break;
3993     }
3994 
3995     default:
3996       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3997   }
3998 }
3999 
VisitRem(HRem * rem)4000 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4001   DataType::Type type = rem->GetResultType();
4002   LocationSummary* locations =
4003     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4004 
4005   switch (type) {
4006     case DataType::Type::kInt32:
4007     case DataType::Type::kInt64: {
4008       locations->SetInAt(0, Location::RegisterLocation(RAX));
4009       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4010       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4011       locations->SetOut(Location::RegisterLocation(RDX));
4012       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4013       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4014       // output and request another temp.
4015       if (rem->InputAt(1)->IsConstant()) {
4016         locations->AddTemp(Location::RequiresRegister());
4017       }
4018       break;
4019     }
4020 
4021     case DataType::Type::kFloat32:
4022     case DataType::Type::kFloat64: {
4023       locations->SetInAt(0, Location::Any());
4024       locations->SetInAt(1, Location::Any());
4025       locations->SetOut(Location::RequiresFpuRegister());
4026       locations->AddTemp(Location::RegisterLocation(RAX));
4027       break;
4028     }
4029 
4030     default:
4031       LOG(FATAL) << "Unexpected rem type " << type;
4032   }
4033 }
4034 
VisitRem(HRem * rem)4035 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4036   DataType::Type type = rem->GetResultType();
4037   switch (type) {
4038     case DataType::Type::kInt32:
4039     case DataType::Type::kInt64: {
4040       GenerateDivRemIntegral(rem);
4041       break;
4042     }
4043     case DataType::Type::kFloat32:
4044     case DataType::Type::kFloat64: {
4045       GenerateRemFP(rem);
4046       break;
4047     }
4048     default:
4049       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4050   }
4051 }
4052 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4053 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4054   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4055   switch (minmax->GetResultType()) {
4056     case DataType::Type::kInt32:
4057     case DataType::Type::kInt64:
4058       locations->SetInAt(0, Location::RequiresRegister());
4059       locations->SetInAt(1, Location::RequiresRegister());
4060       locations->SetOut(Location::SameAsFirstInput());
4061       break;
4062     case DataType::Type::kFloat32:
4063     case DataType::Type::kFloat64:
4064       locations->SetInAt(0, Location::RequiresFpuRegister());
4065       locations->SetInAt(1, Location::RequiresFpuRegister());
4066       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4067       // the second input to be the output (we can simply swap inputs).
4068       locations->SetOut(Location::SameAsFirstInput());
4069       break;
4070     default:
4071       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4072   }
4073 }
4074 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4075 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4076                                                        bool is_min,
4077                                                        DataType::Type type) {
4078   Location op1_loc = locations->InAt(0);
4079   Location op2_loc = locations->InAt(1);
4080 
4081   // Shortcut for same input locations.
4082   if (op1_loc.Equals(op2_loc)) {
4083     // Can return immediately, as op1_loc == out_loc.
4084     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4085     //       a copy here.
4086     DCHECK(locations->Out().Equals(op1_loc));
4087     return;
4088   }
4089 
4090   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4091   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4092 
4093   //  (out := op1)
4094   //  out <=? op2
4095   //  if out is min jmp done
4096   //  out := op2
4097   // done:
4098 
4099   if (type == DataType::Type::kInt64) {
4100     __ cmpq(out, op2);
4101     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4102   } else {
4103     DCHECK_EQ(type, DataType::Type::kInt32);
4104     __ cmpl(out, op2);
4105     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4106   }
4107 }
4108 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4109 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4110                                                       bool is_min,
4111                                                       DataType::Type type) {
4112   Location op1_loc = locations->InAt(0);
4113   Location op2_loc = locations->InAt(1);
4114   Location out_loc = locations->Out();
4115   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4116 
4117   // Shortcut for same input locations.
4118   if (op1_loc.Equals(op2_loc)) {
4119     DCHECK(out_loc.Equals(op1_loc));
4120     return;
4121   }
4122 
4123   //  (out := op1)
4124   //  out <=? op2
4125   //  if Nan jmp Nan_label
4126   //  if out is min jmp done
4127   //  if op2 is min jmp op2_label
4128   //  handle -0/+0
4129   //  jmp done
4130   // Nan_label:
4131   //  out := NaN
4132   // op2_label:
4133   //  out := op2
4134   // done:
4135   //
4136   // This removes one jmp, but needs to copy one input (op1) to out.
4137   //
4138   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4139 
4140   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4141 
4142   NearLabel nan, done, op2_label;
4143   if (type == DataType::Type::kFloat64) {
4144     __ ucomisd(out, op2);
4145   } else {
4146     DCHECK_EQ(type, DataType::Type::kFloat32);
4147     __ ucomiss(out, op2);
4148   }
4149 
4150   __ j(Condition::kParityEven, &nan);
4151 
4152   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4153   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4154 
4155   // Handle 0.0/-0.0.
4156   if (is_min) {
4157     if (type == DataType::Type::kFloat64) {
4158       __ orpd(out, op2);
4159     } else {
4160       __ orps(out, op2);
4161     }
4162   } else {
4163     if (type == DataType::Type::kFloat64) {
4164       __ andpd(out, op2);
4165     } else {
4166       __ andps(out, op2);
4167     }
4168   }
4169   __ jmp(&done);
4170 
4171   // NaN handling.
4172   __ Bind(&nan);
4173   if (type == DataType::Type::kFloat64) {
4174     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4175   } else {
4176     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4177   }
4178   __ jmp(&done);
4179 
4180   // out := op2;
4181   __ Bind(&op2_label);
4182   if (type == DataType::Type::kFloat64) {
4183     __ movsd(out, op2);
4184   } else {
4185     __ movss(out, op2);
4186   }
4187 
4188   // Done.
4189   __ Bind(&done);
4190 }
4191 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4192 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4193   DataType::Type type = minmax->GetResultType();
4194   switch (type) {
4195     case DataType::Type::kInt32:
4196     case DataType::Type::kInt64:
4197       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4198       break;
4199     case DataType::Type::kFloat32:
4200     case DataType::Type::kFloat64:
4201       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4202       break;
4203     default:
4204       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4205   }
4206 }
4207 
VisitMin(HMin * min)4208 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4209   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4210 }
4211 
VisitMin(HMin * min)4212 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4213   GenerateMinMax(min, /*is_min*/ true);
4214 }
4215 
VisitMax(HMax * max)4216 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4217   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4218 }
4219 
VisitMax(HMax * max)4220 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4221   GenerateMinMax(max, /*is_min*/ false);
4222 }
4223 
VisitAbs(HAbs * abs)4224 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4225   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4226   switch (abs->GetResultType()) {
4227     case DataType::Type::kInt32:
4228     case DataType::Type::kInt64:
4229       locations->SetInAt(0, Location::RequiresRegister());
4230       locations->SetOut(Location::SameAsFirstInput());
4231       locations->AddTemp(Location::RequiresRegister());
4232       break;
4233     case DataType::Type::kFloat32:
4234     case DataType::Type::kFloat64:
4235       locations->SetInAt(0, Location::RequiresFpuRegister());
4236       locations->SetOut(Location::SameAsFirstInput());
4237       locations->AddTemp(Location::RequiresFpuRegister());
4238       break;
4239     default:
4240       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4241   }
4242 }
4243 
VisitAbs(HAbs * abs)4244 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4245   LocationSummary* locations = abs->GetLocations();
4246   switch (abs->GetResultType()) {
4247     case DataType::Type::kInt32: {
4248       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4249       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4250       // Create mask.
4251       __ movl(mask, out);
4252       __ sarl(mask, Immediate(31));
4253       // Add mask.
4254       __ addl(out, mask);
4255       __ xorl(out, mask);
4256       break;
4257     }
4258     case DataType::Type::kInt64: {
4259       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4260       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4261       // Create mask.
4262       __ movq(mask, out);
4263       __ sarq(mask, Immediate(63));
4264       // Add mask.
4265       __ addq(out, mask);
4266       __ xorq(out, mask);
4267       break;
4268     }
4269     case DataType::Type::kFloat32: {
4270       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4271       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4272       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4273       __ andps(out, mask);
4274       break;
4275     }
4276     case DataType::Type::kFloat64: {
4277       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4278       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4279       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4280       __ andpd(out, mask);
4281       break;
4282     }
4283     default:
4284       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4285   }
4286 }
4287 
VisitDivZeroCheck(HDivZeroCheck * instruction)4288 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4289   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4290   locations->SetInAt(0, Location::Any());
4291 }
4292 
VisitDivZeroCheck(HDivZeroCheck * instruction)4293 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4294   SlowPathCode* slow_path =
4295       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4296   codegen_->AddSlowPath(slow_path);
4297 
4298   LocationSummary* locations = instruction->GetLocations();
4299   Location value = locations->InAt(0);
4300 
4301   switch (instruction->GetType()) {
4302     case DataType::Type::kBool:
4303     case DataType::Type::kUint8:
4304     case DataType::Type::kInt8:
4305     case DataType::Type::kUint16:
4306     case DataType::Type::kInt16:
4307     case DataType::Type::kInt32: {
4308       if (value.IsRegister()) {
4309         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4310         __ j(kEqual, slow_path->GetEntryLabel());
4311       } else if (value.IsStackSlot()) {
4312         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4313         __ j(kEqual, slow_path->GetEntryLabel());
4314       } else {
4315         DCHECK(value.IsConstant()) << value;
4316         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4317           __ jmp(slow_path->GetEntryLabel());
4318         }
4319       }
4320       break;
4321     }
4322     case DataType::Type::kInt64: {
4323       if (value.IsRegister()) {
4324         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4325         __ j(kEqual, slow_path->GetEntryLabel());
4326       } else if (value.IsDoubleStackSlot()) {
4327         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4328         __ j(kEqual, slow_path->GetEntryLabel());
4329       } else {
4330         DCHECK(value.IsConstant()) << value;
4331         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4332           __ jmp(slow_path->GetEntryLabel());
4333         }
4334       }
4335       break;
4336     }
4337     default:
4338       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4339   }
4340 }
4341 
HandleShift(HBinaryOperation * op)4342 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4343   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4344 
4345   LocationSummary* locations =
4346       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4347 
4348   switch (op->GetResultType()) {
4349     case DataType::Type::kInt32:
4350     case DataType::Type::kInt64: {
4351       locations->SetInAt(0, Location::RequiresRegister());
4352       // The shift count needs to be in CL.
4353       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4354       locations->SetOut(Location::SameAsFirstInput());
4355       break;
4356     }
4357     default:
4358       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4359   }
4360 }
4361 
HandleShift(HBinaryOperation * op)4362 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4363   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4364 
4365   LocationSummary* locations = op->GetLocations();
4366   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4367   Location second = locations->InAt(1);
4368 
4369   switch (op->GetResultType()) {
4370     case DataType::Type::kInt32: {
4371       if (second.IsRegister()) {
4372         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4373         if (op->IsShl()) {
4374           __ shll(first_reg, second_reg);
4375         } else if (op->IsShr()) {
4376           __ sarl(first_reg, second_reg);
4377         } else {
4378           __ shrl(first_reg, second_reg);
4379         }
4380       } else {
4381         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4382         if (op->IsShl()) {
4383           __ shll(first_reg, imm);
4384         } else if (op->IsShr()) {
4385           __ sarl(first_reg, imm);
4386         } else {
4387           __ shrl(first_reg, imm);
4388         }
4389       }
4390       break;
4391     }
4392     case DataType::Type::kInt64: {
4393       if (second.IsRegister()) {
4394         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4395         if (op->IsShl()) {
4396           __ shlq(first_reg, second_reg);
4397         } else if (op->IsShr()) {
4398           __ sarq(first_reg, second_reg);
4399         } else {
4400           __ shrq(first_reg, second_reg);
4401         }
4402       } else {
4403         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4404         if (op->IsShl()) {
4405           __ shlq(first_reg, imm);
4406         } else if (op->IsShr()) {
4407           __ sarq(first_reg, imm);
4408         } else {
4409           __ shrq(first_reg, imm);
4410         }
4411       }
4412       break;
4413     }
4414     default:
4415       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4416       UNREACHABLE();
4417   }
4418 }
4419 
VisitRor(HRor * ror)4420 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4421   LocationSummary* locations =
4422       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4423 
4424   switch (ror->GetResultType()) {
4425     case DataType::Type::kInt32:
4426     case DataType::Type::kInt64: {
4427       locations->SetInAt(0, Location::RequiresRegister());
4428       // The shift count needs to be in CL (unless it is a constant).
4429       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4430       locations->SetOut(Location::SameAsFirstInput());
4431       break;
4432     }
4433     default:
4434       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4435       UNREACHABLE();
4436   }
4437 }
4438 
VisitRor(HRor * ror)4439 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4440   LocationSummary* locations = ror->GetLocations();
4441   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4442   Location second = locations->InAt(1);
4443 
4444   switch (ror->GetResultType()) {
4445     case DataType::Type::kInt32:
4446       if (second.IsRegister()) {
4447         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4448         __ rorl(first_reg, second_reg);
4449       } else {
4450         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4451         __ rorl(first_reg, imm);
4452       }
4453       break;
4454     case DataType::Type::kInt64:
4455       if (second.IsRegister()) {
4456         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4457         __ rorq(first_reg, second_reg);
4458       } else {
4459         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4460         __ rorq(first_reg, imm);
4461       }
4462       break;
4463     default:
4464       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4465       UNREACHABLE();
4466   }
4467 }
4468 
VisitShl(HShl * shl)4469 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4470   HandleShift(shl);
4471 }
4472 
VisitShl(HShl * shl)4473 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4474   HandleShift(shl);
4475 }
4476 
VisitShr(HShr * shr)4477 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4478   HandleShift(shr);
4479 }
4480 
VisitShr(HShr * shr)4481 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4482   HandleShift(shr);
4483 }
4484 
VisitUShr(HUShr * ushr)4485 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4486   HandleShift(ushr);
4487 }
4488 
VisitUShr(HUShr * ushr)4489 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4490   HandleShift(ushr);
4491 }
4492 
VisitNewInstance(HNewInstance * instruction)4493 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4494   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4495       instruction, LocationSummary::kCallOnMainOnly);
4496   InvokeRuntimeCallingConvention calling_convention;
4497   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4498   locations->SetOut(Location::RegisterLocation(RAX));
4499 }
4500 
VisitNewInstance(HNewInstance * instruction)4501 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4502   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4503   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4504   DCHECK(!codegen_->IsLeafMethod());
4505 }
4506 
VisitNewArray(HNewArray * instruction)4507 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4508   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4509       instruction, LocationSummary::kCallOnMainOnly);
4510   InvokeRuntimeCallingConvention calling_convention;
4511   locations->SetOut(Location::RegisterLocation(RAX));
4512   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4513   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4514 }
4515 
VisitNewArray(HNewArray * instruction)4516 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4517   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4518   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4519   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4520   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4521   DCHECK(!codegen_->IsLeafMethod());
4522 }
4523 
VisitParameterValue(HParameterValue * instruction)4524 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4525   LocationSummary* locations =
4526       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4527   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4528   if (location.IsStackSlot()) {
4529     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4530   } else if (location.IsDoubleStackSlot()) {
4531     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4532   }
4533   locations->SetOut(location);
4534 }
4535 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4536 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4537     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4538   // Nothing to do, the parameter is already at its location.
4539 }
4540 
VisitCurrentMethod(HCurrentMethod * instruction)4541 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4542   LocationSummary* locations =
4543       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4544   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4545 }
4546 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4547 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4548     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4549   // Nothing to do, the method is already at its location.
4550 }
4551 
VisitClassTableGet(HClassTableGet * instruction)4552 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4553   LocationSummary* locations =
4554       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4555   locations->SetInAt(0, Location::RequiresRegister());
4556   locations->SetOut(Location::RequiresRegister());
4557 }
4558 
VisitClassTableGet(HClassTableGet * instruction)4559 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4560   LocationSummary* locations = instruction->GetLocations();
4561   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4562     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4563         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4564     __ movq(locations->Out().AsRegister<CpuRegister>(),
4565             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4566   } else {
4567     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4568         instruction->GetIndex(), kX86_64PointerSize));
4569     __ movq(locations->Out().AsRegister<CpuRegister>(),
4570             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4571             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4572     __ movq(locations->Out().AsRegister<CpuRegister>(),
4573             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4574   }
4575 }
4576 
VisitNot(HNot * not_)4577 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4578   LocationSummary* locations =
4579       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4580   locations->SetInAt(0, Location::RequiresRegister());
4581   locations->SetOut(Location::SameAsFirstInput());
4582 }
4583 
VisitNot(HNot * not_)4584 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4585   LocationSummary* locations = not_->GetLocations();
4586   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4587             locations->Out().AsRegister<CpuRegister>().AsRegister());
4588   Location out = locations->Out();
4589   switch (not_->GetResultType()) {
4590     case DataType::Type::kInt32:
4591       __ notl(out.AsRegister<CpuRegister>());
4592       break;
4593 
4594     case DataType::Type::kInt64:
4595       __ notq(out.AsRegister<CpuRegister>());
4596       break;
4597 
4598     default:
4599       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4600   }
4601 }
4602 
VisitBooleanNot(HBooleanNot * bool_not)4603 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4604   LocationSummary* locations =
4605       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4606   locations->SetInAt(0, Location::RequiresRegister());
4607   locations->SetOut(Location::SameAsFirstInput());
4608 }
4609 
VisitBooleanNot(HBooleanNot * bool_not)4610 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4611   LocationSummary* locations = bool_not->GetLocations();
4612   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4613             locations->Out().AsRegister<CpuRegister>().AsRegister());
4614   Location out = locations->Out();
4615   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4616 }
4617 
VisitPhi(HPhi * instruction)4618 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4619   LocationSummary* locations =
4620       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4621   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4622     locations->SetInAt(i, Location::Any());
4623   }
4624   locations->SetOut(Location::Any());
4625 }
4626 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4627 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4628   LOG(FATAL) << "Unimplemented";
4629 }
4630 
GenerateMemoryBarrier(MemBarrierKind kind)4631 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4632   /*
4633    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4634    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4635    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4636    */
4637   switch (kind) {
4638     case MemBarrierKind::kAnyAny: {
4639       MemoryFence();
4640       break;
4641     }
4642     case MemBarrierKind::kAnyStore:
4643     case MemBarrierKind::kLoadAny:
4644     case MemBarrierKind::kStoreStore: {
4645       // nop
4646       break;
4647     }
4648     case MemBarrierKind::kNTStoreStore:
4649       // Non-Temporal Store/Store needs an explicit fence.
4650       MemoryFence(/* non-temporal= */ true);
4651       break;
4652   }
4653 }
4654 
HandleFieldGet(HInstruction * instruction)4655 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4656   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4657 
4658   bool object_field_get_with_read_barrier =
4659       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4660   LocationSummary* locations =
4661       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4662                                                        object_field_get_with_read_barrier
4663                                                            ? LocationSummary::kCallOnSlowPath
4664                                                            : LocationSummary::kNoCall);
4665   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4666     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4667   }
4668   locations->SetInAt(0, Location::RequiresRegister());
4669   if (DataType::IsFloatingPointType(instruction->GetType())) {
4670     locations->SetOut(Location::RequiresFpuRegister());
4671   } else {
4672     // The output overlaps for an object field get when read barriers
4673     // are enabled: we do not want the move to overwrite the object's
4674     // location, as we need it to emit the read barrier.
4675     locations->SetOut(
4676         Location::RequiresRegister(),
4677         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4678   }
4679 }
4680 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4681 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4682                                                     const FieldInfo& field_info) {
4683   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4684 
4685   LocationSummary* locations = instruction->GetLocations();
4686   Location base_loc = locations->InAt(0);
4687   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4688   Location out = locations->Out();
4689   bool is_volatile = field_info.IsVolatile();
4690   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4691   DataType::Type load_type = instruction->GetType();
4692   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4693 
4694   switch (load_type) {
4695     case DataType::Type::kBool:
4696     case DataType::Type::kUint8: {
4697       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4698       break;
4699     }
4700 
4701     case DataType::Type::kInt8: {
4702       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4703       break;
4704     }
4705 
4706     case DataType::Type::kUint16: {
4707       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4708       break;
4709     }
4710 
4711     case DataType::Type::kInt16: {
4712       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4713       break;
4714     }
4715 
4716     case DataType::Type::kInt32: {
4717       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4718       break;
4719     }
4720 
4721     case DataType::Type::kReference: {
4722       // /* HeapReference<Object> */ out = *(base + offset)
4723       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4724         // Note that a potential implicit null check is handled in this
4725         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4726         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4727             instruction, out, base, offset, /* needs_null_check= */ true);
4728         if (is_volatile) {
4729           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4730         }
4731       } else {
4732         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4733         codegen_->MaybeRecordImplicitNullCheck(instruction);
4734         if (is_volatile) {
4735           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4736         }
4737         // If read barriers are enabled, emit read barriers other than
4738         // Baker's using a slow path (and also unpoison the loaded
4739         // reference, if heap poisoning is enabled).
4740         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4741       }
4742       break;
4743     }
4744 
4745     case DataType::Type::kInt64: {
4746       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4747       break;
4748     }
4749 
4750     case DataType::Type::kFloat32: {
4751       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4752       break;
4753     }
4754 
4755     case DataType::Type::kFloat64: {
4756       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4757       break;
4758     }
4759 
4760     case DataType::Type::kUint32:
4761     case DataType::Type::kUint64:
4762     case DataType::Type::kVoid:
4763       LOG(FATAL) << "Unreachable type " << load_type;
4764       UNREACHABLE();
4765   }
4766 
4767   if (load_type == DataType::Type::kReference) {
4768     // Potential implicit null checks, in the case of reference
4769     // fields, are handled in the previous switch statement.
4770   } else {
4771     codegen_->MaybeRecordImplicitNullCheck(instruction);
4772   }
4773 
4774   if (is_volatile) {
4775     if (load_type == DataType::Type::kReference) {
4776       // Memory barriers, in the case of references, are also handled
4777       // in the previous switch statement.
4778     } else {
4779       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4780     }
4781   }
4782 }
4783 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4784 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4785                                             const FieldInfo& field_info) {
4786   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4787 
4788   LocationSummary* locations =
4789       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4790   DataType::Type field_type = field_info.GetFieldType();
4791   bool is_volatile = field_info.IsVolatile();
4792   bool needs_write_barrier =
4793       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4794 
4795   locations->SetInAt(0, Location::RequiresRegister());
4796   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4797     if (is_volatile) {
4798       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4799       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4800     } else {
4801       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4802     }
4803   } else {
4804     if (is_volatile) {
4805       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4806       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4807     } else {
4808       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4809     }
4810   }
4811   if (needs_write_barrier) {
4812     // Temporary registers for the write barrier.
4813     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4814     locations->AddTemp(Location::RequiresRegister());
4815   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4816     // Temporary register for the reference poisoning.
4817     locations->AddTemp(Location::RequiresRegister());
4818   }
4819 }
4820 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4821 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4822                                                     const FieldInfo& field_info,
4823                                                     bool value_can_be_null) {
4824   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4825 
4826   LocationSummary* locations = instruction->GetLocations();
4827   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4828   Location value = locations->InAt(1);
4829   bool is_volatile = field_info.IsVolatile();
4830   DataType::Type field_type = field_info.GetFieldType();
4831   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4832 
4833   if (is_volatile) {
4834     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4835   }
4836 
4837   bool maybe_record_implicit_null_check_done = false;
4838 
4839   switch (field_type) {
4840     case DataType::Type::kBool:
4841     case DataType::Type::kUint8:
4842     case DataType::Type::kInt8: {
4843       if (value.IsConstant()) {
4844         __ movb(Address(base, offset),
4845                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4846       } else {
4847         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4848       }
4849       break;
4850     }
4851 
4852     case DataType::Type::kUint16:
4853     case DataType::Type::kInt16: {
4854       if (value.IsConstant()) {
4855         __ movw(Address(base, offset),
4856                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4857       } else {
4858         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4859       }
4860       break;
4861     }
4862 
4863     case DataType::Type::kInt32:
4864     case DataType::Type::kReference: {
4865       if (value.IsConstant()) {
4866         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4867         // `field_type == DataType::Type::kReference` implies `v == 0`.
4868         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4869         // Note: if heap poisoning is enabled, no need to poison
4870         // (negate) `v` if it is a reference, as it would be null.
4871         __ movl(Address(base, offset), Immediate(v));
4872       } else {
4873         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4874           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4875           __ movl(temp, value.AsRegister<CpuRegister>());
4876           __ PoisonHeapReference(temp);
4877           __ movl(Address(base, offset), temp);
4878         } else {
4879           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4880         }
4881       }
4882       break;
4883     }
4884 
4885     case DataType::Type::kInt64: {
4886       if (value.IsConstant()) {
4887         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4888         codegen_->MoveInt64ToAddress(Address(base, offset),
4889                                      Address(base, offset + sizeof(int32_t)),
4890                                      v,
4891                                      instruction);
4892         maybe_record_implicit_null_check_done = true;
4893       } else {
4894         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4895       }
4896       break;
4897     }
4898 
4899     case DataType::Type::kFloat32: {
4900       if (value.IsConstant()) {
4901         int32_t v =
4902             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4903         __ movl(Address(base, offset), Immediate(v));
4904       } else {
4905         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4906       }
4907       break;
4908     }
4909 
4910     case DataType::Type::kFloat64: {
4911       if (value.IsConstant()) {
4912         int64_t v =
4913             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4914         codegen_->MoveInt64ToAddress(Address(base, offset),
4915                                      Address(base, offset + sizeof(int32_t)),
4916                                      v,
4917                                      instruction);
4918         maybe_record_implicit_null_check_done = true;
4919       } else {
4920         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4921       }
4922       break;
4923     }
4924 
4925     case DataType::Type::kUint32:
4926     case DataType::Type::kUint64:
4927     case DataType::Type::kVoid:
4928       LOG(FATAL) << "Unreachable type " << field_type;
4929       UNREACHABLE();
4930   }
4931 
4932   if (!maybe_record_implicit_null_check_done) {
4933     codegen_->MaybeRecordImplicitNullCheck(instruction);
4934   }
4935 
4936   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4937     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4938     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4939     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4940   }
4941 
4942   if (is_volatile) {
4943     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4944   }
4945 }
4946 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4947 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4948   HandleFieldSet(instruction, instruction->GetFieldInfo());
4949 }
4950 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4951 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4952   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4953 }
4954 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4955 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4956   HandleFieldGet(instruction);
4957 }
4958 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4959 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4960   HandleFieldGet(instruction, instruction->GetFieldInfo());
4961 }
4962 
VisitStaticFieldGet(HStaticFieldGet * instruction)4963 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4964   HandleFieldGet(instruction);
4965 }
4966 
VisitStaticFieldGet(HStaticFieldGet * instruction)4967 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4968   HandleFieldGet(instruction, instruction->GetFieldInfo());
4969 }
4970 
VisitStaticFieldSet(HStaticFieldSet * instruction)4971 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4972   HandleFieldSet(instruction, instruction->GetFieldInfo());
4973 }
4974 
VisitStaticFieldSet(HStaticFieldSet * instruction)4975 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4976   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4977 }
4978 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)4979 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
4980   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
4981 }
4982 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)4983 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
4984   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
4985   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
4986 }
4987 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4988 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4989     HUnresolvedInstanceFieldGet* instruction) {
4990   FieldAccessCallingConventionX86_64 calling_convention;
4991   codegen_->CreateUnresolvedFieldLocationSummary(
4992       instruction, instruction->GetFieldType(), calling_convention);
4993 }
4994 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4995 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4996     HUnresolvedInstanceFieldGet* instruction) {
4997   FieldAccessCallingConventionX86_64 calling_convention;
4998   codegen_->GenerateUnresolvedFieldAccess(instruction,
4999                                           instruction->GetFieldType(),
5000                                           instruction->GetFieldIndex(),
5001                                           instruction->GetDexPc(),
5002                                           calling_convention);
5003 }
5004 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5005 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5006     HUnresolvedInstanceFieldSet* instruction) {
5007   FieldAccessCallingConventionX86_64 calling_convention;
5008   codegen_->CreateUnresolvedFieldLocationSummary(
5009       instruction, instruction->GetFieldType(), calling_convention);
5010 }
5011 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5012 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5013     HUnresolvedInstanceFieldSet* instruction) {
5014   FieldAccessCallingConventionX86_64 calling_convention;
5015   codegen_->GenerateUnresolvedFieldAccess(instruction,
5016                                           instruction->GetFieldType(),
5017                                           instruction->GetFieldIndex(),
5018                                           instruction->GetDexPc(),
5019                                           calling_convention);
5020 }
5021 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5022 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5023     HUnresolvedStaticFieldGet* instruction) {
5024   FieldAccessCallingConventionX86_64 calling_convention;
5025   codegen_->CreateUnresolvedFieldLocationSummary(
5026       instruction, instruction->GetFieldType(), calling_convention);
5027 }
5028 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5029 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5030     HUnresolvedStaticFieldGet* instruction) {
5031   FieldAccessCallingConventionX86_64 calling_convention;
5032   codegen_->GenerateUnresolvedFieldAccess(instruction,
5033                                           instruction->GetFieldType(),
5034                                           instruction->GetFieldIndex(),
5035                                           instruction->GetDexPc(),
5036                                           calling_convention);
5037 }
5038 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5039 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5040     HUnresolvedStaticFieldSet* instruction) {
5041   FieldAccessCallingConventionX86_64 calling_convention;
5042   codegen_->CreateUnresolvedFieldLocationSummary(
5043       instruction, instruction->GetFieldType(), calling_convention);
5044 }
5045 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5046 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5047     HUnresolvedStaticFieldSet* instruction) {
5048   FieldAccessCallingConventionX86_64 calling_convention;
5049   codegen_->GenerateUnresolvedFieldAccess(instruction,
5050                                           instruction->GetFieldType(),
5051                                           instruction->GetFieldIndex(),
5052                                           instruction->GetDexPc(),
5053                                           calling_convention);
5054 }
5055 
VisitNullCheck(HNullCheck * instruction)5056 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5057   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5058   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5059       ? Location::RequiresRegister()
5060       : Location::Any();
5061   locations->SetInAt(0, loc);
5062 }
5063 
GenerateImplicitNullCheck(HNullCheck * instruction)5064 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5065   if (CanMoveNullCheckToUser(instruction)) {
5066     return;
5067   }
5068   LocationSummary* locations = instruction->GetLocations();
5069   Location obj = locations->InAt(0);
5070 
5071   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5072   RecordPcInfo(instruction, instruction->GetDexPc());
5073 }
5074 
GenerateExplicitNullCheck(HNullCheck * instruction)5075 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5076   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5077   AddSlowPath(slow_path);
5078 
5079   LocationSummary* locations = instruction->GetLocations();
5080   Location obj = locations->InAt(0);
5081 
5082   if (obj.IsRegister()) {
5083     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5084   } else if (obj.IsStackSlot()) {
5085     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5086   } else {
5087     DCHECK(obj.IsConstant()) << obj;
5088     DCHECK(obj.GetConstant()->IsNullConstant());
5089     __ jmp(slow_path->GetEntryLabel());
5090     return;
5091   }
5092   __ j(kEqual, slow_path->GetEntryLabel());
5093 }
5094 
VisitNullCheck(HNullCheck * instruction)5095 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5096   codegen_->GenerateNullCheck(instruction);
5097 }
5098 
VisitArrayGet(HArrayGet * instruction)5099 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5100   bool object_array_get_with_read_barrier =
5101       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5102   LocationSummary* locations =
5103       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5104                                                        object_array_get_with_read_barrier
5105                                                            ? LocationSummary::kCallOnSlowPath
5106                                                            : LocationSummary::kNoCall);
5107   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5108     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5109   }
5110   locations->SetInAt(0, Location::RequiresRegister());
5111   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5112   if (DataType::IsFloatingPointType(instruction->GetType())) {
5113     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5114   } else {
5115     // The output overlaps for an object array get when read barriers
5116     // are enabled: we do not want the move to overwrite the array's
5117     // location, as we need it to emit the read barrier.
5118     locations->SetOut(
5119         Location::RequiresRegister(),
5120         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5121   }
5122 }
5123 
VisitArrayGet(HArrayGet * instruction)5124 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5125   LocationSummary* locations = instruction->GetLocations();
5126   Location obj_loc = locations->InAt(0);
5127   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5128   Location index = locations->InAt(1);
5129   Location out_loc = locations->Out();
5130   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5131 
5132   DataType::Type type = instruction->GetType();
5133   switch (type) {
5134     case DataType::Type::kBool:
5135     case DataType::Type::kUint8: {
5136       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5137       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5138       break;
5139     }
5140 
5141     case DataType::Type::kInt8: {
5142       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5143       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5144       break;
5145     }
5146 
5147     case DataType::Type::kUint16: {
5148       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5149       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5150         // Branch cases into compressed and uncompressed for each index's type.
5151         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5152         NearLabel done, not_compressed;
5153         __ testb(Address(obj, count_offset), Immediate(1));
5154         codegen_->MaybeRecordImplicitNullCheck(instruction);
5155         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5156                       "Expecting 0=compressed, 1=uncompressed");
5157         __ j(kNotZero, &not_compressed);
5158         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5159         __ jmp(&done);
5160         __ Bind(&not_compressed);
5161         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5162         __ Bind(&done);
5163       } else {
5164         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5165       }
5166       break;
5167     }
5168 
5169     case DataType::Type::kInt16: {
5170       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5171       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5172       break;
5173     }
5174 
5175     case DataType::Type::kInt32: {
5176       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5177       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5178       break;
5179     }
5180 
5181     case DataType::Type::kReference: {
5182       static_assert(
5183           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5184           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5185       // /* HeapReference<Object> */ out =
5186       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5187       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5188         // Note that a potential implicit null check is handled in this
5189         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5190         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5191             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5192       } else {
5193         CpuRegister out = out_loc.AsRegister<CpuRegister>();
5194         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5195         codegen_->MaybeRecordImplicitNullCheck(instruction);
5196         // If read barriers are enabled, emit read barriers other than
5197         // Baker's using a slow path (and also unpoison the loaded
5198         // reference, if heap poisoning is enabled).
5199         if (index.IsConstant()) {
5200           uint32_t offset =
5201               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5202           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5203         } else {
5204           codegen_->MaybeGenerateReadBarrierSlow(
5205               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5206         }
5207       }
5208       break;
5209     }
5210 
5211     case DataType::Type::kInt64: {
5212       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5213       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5214       break;
5215     }
5216 
5217     case DataType::Type::kFloat32: {
5218       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5219       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5220       break;
5221     }
5222 
5223     case DataType::Type::kFloat64: {
5224       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5225       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5226       break;
5227     }
5228 
5229     case DataType::Type::kUint32:
5230     case DataType::Type::kUint64:
5231     case DataType::Type::kVoid:
5232       LOG(FATAL) << "Unreachable type " << type;
5233       UNREACHABLE();
5234   }
5235 
5236   if (type == DataType::Type::kReference) {
5237     // Potential implicit null checks, in the case of reference
5238     // arrays, are handled in the previous switch statement.
5239   } else {
5240     codegen_->MaybeRecordImplicitNullCheck(instruction);
5241   }
5242 }
5243 
VisitArraySet(HArraySet * instruction)5244 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5245   DataType::Type value_type = instruction->GetComponentType();
5246 
5247   bool needs_write_barrier =
5248       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5249   bool needs_type_check = instruction->NeedsTypeCheck();
5250 
5251   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5252       instruction,
5253       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5254 
5255   locations->SetInAt(0, Location::RequiresRegister());
5256   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5257   if (DataType::IsFloatingPointType(value_type)) {
5258     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5259   } else {
5260     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5261   }
5262 
5263   if (needs_write_barrier) {
5264     // Temporary registers for the write barrier.
5265     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5266     locations->AddTemp(Location::RequiresRegister());
5267   }
5268 }
5269 
VisitArraySet(HArraySet * instruction)5270 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5271   LocationSummary* locations = instruction->GetLocations();
5272   Location array_loc = locations->InAt(0);
5273   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5274   Location index = locations->InAt(1);
5275   Location value = locations->InAt(2);
5276   DataType::Type value_type = instruction->GetComponentType();
5277   bool needs_type_check = instruction->NeedsTypeCheck();
5278   bool needs_write_barrier =
5279       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5280 
5281   switch (value_type) {
5282     case DataType::Type::kBool:
5283     case DataType::Type::kUint8:
5284     case DataType::Type::kInt8: {
5285       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5286       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5287       if (value.IsRegister()) {
5288         __ movb(address, value.AsRegister<CpuRegister>());
5289       } else {
5290         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5291       }
5292       codegen_->MaybeRecordImplicitNullCheck(instruction);
5293       break;
5294     }
5295 
5296     case DataType::Type::kUint16:
5297     case DataType::Type::kInt16: {
5298       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5299       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5300       if (value.IsRegister()) {
5301         __ movw(address, value.AsRegister<CpuRegister>());
5302       } else {
5303         DCHECK(value.IsConstant()) << value;
5304         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5305       }
5306       codegen_->MaybeRecordImplicitNullCheck(instruction);
5307       break;
5308     }
5309 
5310     case DataType::Type::kReference: {
5311       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5312       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5313 
5314       if (!value.IsRegister()) {
5315         // Just setting null.
5316         DCHECK(instruction->InputAt(2)->IsNullConstant());
5317         DCHECK(value.IsConstant()) << value;
5318         __ movl(address, Immediate(0));
5319         codegen_->MaybeRecordImplicitNullCheck(instruction);
5320         DCHECK(!needs_write_barrier);
5321         DCHECK(!needs_type_check);
5322         break;
5323       }
5324 
5325       DCHECK(needs_write_barrier);
5326       CpuRegister register_value = value.AsRegister<CpuRegister>();
5327       Location temp_loc = locations->GetTemp(0);
5328       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5329 
5330       bool can_value_be_null = instruction->GetValueCanBeNull();
5331       NearLabel do_store;
5332       if (can_value_be_null) {
5333         __ testl(register_value, register_value);
5334         __ j(kEqual, &do_store);
5335       }
5336 
5337       SlowPathCode* slow_path = nullptr;
5338       if (needs_type_check) {
5339         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5340         codegen_->AddSlowPath(slow_path);
5341 
5342         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5343         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5344         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5345 
5346         // Note that when Baker read barriers are enabled, the type
5347         // checks are performed without read barriers.  This is fine,
5348         // even in the case where a class object is in the from-space
5349         // after the flip, as a comparison involving such a type would
5350         // not produce a false positive; it may of course produce a
5351         // false negative, in which case we would take the ArraySet
5352         // slow path.
5353 
5354         // /* HeapReference<Class> */ temp = array->klass_
5355         __ movl(temp, Address(array, class_offset));
5356         codegen_->MaybeRecordImplicitNullCheck(instruction);
5357         __ MaybeUnpoisonHeapReference(temp);
5358 
5359         // /* HeapReference<Class> */ temp = temp->component_type_
5360         __ movl(temp, Address(temp, component_offset));
5361         // If heap poisoning is enabled, no need to unpoison `temp`
5362         // nor the object reference in `register_value->klass`, as
5363         // we are comparing two poisoned references.
5364         __ cmpl(temp, Address(register_value, class_offset));
5365 
5366         if (instruction->StaticTypeOfArrayIsObjectArray()) {
5367           NearLabel do_put;
5368           __ j(kEqual, &do_put);
5369           // If heap poisoning is enabled, the `temp` reference has
5370           // not been unpoisoned yet; unpoison it now.
5371           __ MaybeUnpoisonHeapReference(temp);
5372 
5373           // If heap poisoning is enabled, no need to unpoison the
5374           // heap reference loaded below, as it is only used for a
5375           // comparison with null.
5376           __ cmpl(Address(temp, super_offset), Immediate(0));
5377           __ j(kNotEqual, slow_path->GetEntryLabel());
5378           __ Bind(&do_put);
5379         } else {
5380           __ j(kNotEqual, slow_path->GetEntryLabel());
5381         }
5382       }
5383 
5384       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5385       codegen_->MarkGCCard(
5386           temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5387 
5388       if (can_value_be_null) {
5389         DCHECK(do_store.IsLinked());
5390         __ Bind(&do_store);
5391       }
5392 
5393       Location source = value;
5394       if (kPoisonHeapReferences) {
5395         __ movl(temp, register_value);
5396         __ PoisonHeapReference(temp);
5397         source = temp_loc;
5398       }
5399 
5400       __ movl(address, source.AsRegister<CpuRegister>());
5401 
5402       if (can_value_be_null || !needs_type_check) {
5403         codegen_->MaybeRecordImplicitNullCheck(instruction);
5404       }
5405 
5406       if (slow_path != nullptr) {
5407         __ Bind(slow_path->GetExitLabel());
5408       }
5409 
5410       break;
5411     }
5412 
5413     case DataType::Type::kInt32: {
5414       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5415       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5416       if (value.IsRegister()) {
5417         __ movl(address, value.AsRegister<CpuRegister>());
5418       } else {
5419         DCHECK(value.IsConstant()) << value;
5420         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5421         __ movl(address, Immediate(v));
5422       }
5423       codegen_->MaybeRecordImplicitNullCheck(instruction);
5424       break;
5425     }
5426 
5427     case DataType::Type::kInt64: {
5428       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5429       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5430       if (value.IsRegister()) {
5431         __ movq(address, value.AsRegister<CpuRegister>());
5432         codegen_->MaybeRecordImplicitNullCheck(instruction);
5433       } else {
5434         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5435         Address address_high =
5436             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5437         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5438       }
5439       break;
5440     }
5441 
5442     case DataType::Type::kFloat32: {
5443       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5444       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5445       if (value.IsFpuRegister()) {
5446         __ movss(address, value.AsFpuRegister<XmmRegister>());
5447       } else {
5448         DCHECK(value.IsConstant());
5449         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5450         __ movl(address, Immediate(v));
5451       }
5452       codegen_->MaybeRecordImplicitNullCheck(instruction);
5453       break;
5454     }
5455 
5456     case DataType::Type::kFloat64: {
5457       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5458       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5459       if (value.IsFpuRegister()) {
5460         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5461         codegen_->MaybeRecordImplicitNullCheck(instruction);
5462       } else {
5463         int64_t v =
5464             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5465         Address address_high =
5466             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5467         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5468       }
5469       break;
5470     }
5471 
5472     case DataType::Type::kUint32:
5473     case DataType::Type::kUint64:
5474     case DataType::Type::kVoid:
5475       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5476       UNREACHABLE();
5477   }
5478 }
5479 
VisitArrayLength(HArrayLength * instruction)5480 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5481   LocationSummary* locations =
5482       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5483   locations->SetInAt(0, Location::RequiresRegister());
5484   if (!instruction->IsEmittedAtUseSite()) {
5485     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5486   }
5487 }
5488 
VisitArrayLength(HArrayLength * instruction)5489 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5490   if (instruction->IsEmittedAtUseSite()) {
5491     return;
5492   }
5493 
5494   LocationSummary* locations = instruction->GetLocations();
5495   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5496   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5497   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5498   __ movl(out, Address(obj, offset));
5499   codegen_->MaybeRecordImplicitNullCheck(instruction);
5500   // Mask out most significant bit in case the array is String's array of char.
5501   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5502     __ shrl(out, Immediate(1));
5503   }
5504 }
5505 
VisitBoundsCheck(HBoundsCheck * instruction)5506 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5507   RegisterSet caller_saves = RegisterSet::Empty();
5508   InvokeRuntimeCallingConvention calling_convention;
5509   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5510   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5511   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5512   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5513   HInstruction* length = instruction->InputAt(1);
5514   if (!length->IsEmittedAtUseSite()) {
5515     locations->SetInAt(1, Location::RegisterOrConstant(length));
5516   }
5517 }
5518 
VisitBoundsCheck(HBoundsCheck * instruction)5519 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5520   LocationSummary* locations = instruction->GetLocations();
5521   Location index_loc = locations->InAt(0);
5522   Location length_loc = locations->InAt(1);
5523   SlowPathCode* slow_path =
5524       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5525 
5526   if (length_loc.IsConstant()) {
5527     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5528     if (index_loc.IsConstant()) {
5529       // BCE will remove the bounds check if we are guarenteed to pass.
5530       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5531       if (index < 0 || index >= length) {
5532         codegen_->AddSlowPath(slow_path);
5533         __ jmp(slow_path->GetEntryLabel());
5534       } else {
5535         // Some optimization after BCE may have generated this, and we should not
5536         // generate a bounds check if it is a valid range.
5537       }
5538       return;
5539     }
5540 
5541     // We have to reverse the jump condition because the length is the constant.
5542     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5543     __ cmpl(index_reg, Immediate(length));
5544     codegen_->AddSlowPath(slow_path);
5545     __ j(kAboveEqual, slow_path->GetEntryLabel());
5546   } else {
5547     HInstruction* array_length = instruction->InputAt(1);
5548     if (array_length->IsEmittedAtUseSite()) {
5549       // Address the length field in the array.
5550       DCHECK(array_length->IsArrayLength());
5551       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5552       Location array_loc = array_length->GetLocations()->InAt(0);
5553       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5554       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5555         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5556         // the string compression flag) with the in-memory length and avoid the temporary.
5557         CpuRegister length_reg = CpuRegister(TMP);
5558         __ movl(length_reg, array_len);
5559         codegen_->MaybeRecordImplicitNullCheck(array_length);
5560         __ shrl(length_reg, Immediate(1));
5561         codegen_->GenerateIntCompare(length_reg, index_loc);
5562       } else {
5563         // Checking the bound for general case:
5564         // Array of char or String's array when the compression feature off.
5565         if (index_loc.IsConstant()) {
5566           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5567           __ cmpl(array_len, Immediate(value));
5568         } else {
5569           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5570         }
5571         codegen_->MaybeRecordImplicitNullCheck(array_length);
5572       }
5573     } else {
5574       codegen_->GenerateIntCompare(length_loc, index_loc);
5575     }
5576     codegen_->AddSlowPath(slow_path);
5577     __ j(kBelowEqual, slow_path->GetEntryLabel());
5578   }
5579 }
5580 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5581 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5582                                      CpuRegister card,
5583                                      CpuRegister object,
5584                                      CpuRegister value,
5585                                      bool value_can_be_null) {
5586   NearLabel is_null;
5587   if (value_can_be_null) {
5588     __ testl(value, value);
5589     __ j(kEqual, &is_null);
5590   }
5591   // Load the address of the card table into `card`.
5592   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5593                                         /* no_rip= */ true));
5594   // Calculate the offset (in the card table) of the card corresponding to
5595   // `object`.
5596   __ movq(temp, object);
5597   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5598   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5599   // `object`'s card.
5600   //
5601   // Register `card` contains the address of the card table. Note that the card
5602   // table's base is biased during its creation so that it always starts at an
5603   // address whose least-significant byte is equal to `kCardDirty` (see
5604   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5605   // below writes the `kCardDirty` (byte) value into the `object`'s card
5606   // (located at `card + object >> kCardShift`).
5607   //
5608   // This dual use of the value in register `card` (1. to calculate the location
5609   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5610   // (no need to explicitly load `kCardDirty` as an immediate value).
5611   __ movb(Address(temp, card, TIMES_1, 0), card);
5612   if (value_can_be_null) {
5613     __ Bind(&is_null);
5614   }
5615 }
5616 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5617 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5618   LOG(FATAL) << "Unimplemented";
5619 }
5620 
VisitParallelMove(HParallelMove * instruction)5621 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5622   if (instruction->GetNext()->IsSuspendCheck() &&
5623       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5624     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5625     // The back edge will generate the suspend check.
5626     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5627   }
5628 
5629   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5630 }
5631 
VisitSuspendCheck(HSuspendCheck * instruction)5632 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5633   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5634       instruction, LocationSummary::kCallOnSlowPath);
5635   // In suspend check slow path, usually there are no caller-save registers at all.
5636   // If SIMD instructions are present, however, we force spilling all live SIMD
5637   // registers in full width (since the runtime only saves/restores lower part).
5638   locations->SetCustomSlowPathCallerSaves(
5639       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5640 }
5641 
VisitSuspendCheck(HSuspendCheck * instruction)5642 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5643   HBasicBlock* block = instruction->GetBlock();
5644   if (block->GetLoopInformation() != nullptr) {
5645     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5646     // The back edge will generate the suspend check.
5647     return;
5648   }
5649   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5650     // The goto will generate the suspend check.
5651     return;
5652   }
5653   GenerateSuspendCheck(instruction, nullptr);
5654 }
5655 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5656 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5657                                                           HBasicBlock* successor) {
5658   SuspendCheckSlowPathX86_64* slow_path =
5659       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5660   if (slow_path == nullptr) {
5661     slow_path =
5662         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5663     instruction->SetSlowPath(slow_path);
5664     codegen_->AddSlowPath(slow_path);
5665     if (successor != nullptr) {
5666       DCHECK(successor->IsLoopHeader());
5667     }
5668   } else {
5669     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5670   }
5671 
5672   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5673                                   /* no_rip= */ true),
5674                 Immediate(0));
5675   if (successor == nullptr) {
5676     __ j(kNotEqual, slow_path->GetEntryLabel());
5677     __ Bind(slow_path->GetReturnLabel());
5678   } else {
5679     __ j(kEqual, codegen_->GetLabelOf(successor));
5680     __ jmp(slow_path->GetEntryLabel());
5681   }
5682 }
5683 
GetAssembler() const5684 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5685   return codegen_->GetAssembler();
5686 }
5687 
EmitMove(size_t index)5688 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5689   MoveOperands* move = moves_[index];
5690   Location source = move->GetSource();
5691   Location destination = move->GetDestination();
5692 
5693   if (source.IsRegister()) {
5694     if (destination.IsRegister()) {
5695       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5696     } else if (destination.IsStackSlot()) {
5697       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5698               source.AsRegister<CpuRegister>());
5699     } else {
5700       DCHECK(destination.IsDoubleStackSlot());
5701       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5702               source.AsRegister<CpuRegister>());
5703     }
5704   } else if (source.IsStackSlot()) {
5705     if (destination.IsRegister()) {
5706       __ movl(destination.AsRegister<CpuRegister>(),
5707               Address(CpuRegister(RSP), source.GetStackIndex()));
5708     } else if (destination.IsFpuRegister()) {
5709       __ movss(destination.AsFpuRegister<XmmRegister>(),
5710               Address(CpuRegister(RSP), source.GetStackIndex()));
5711     } else {
5712       DCHECK(destination.IsStackSlot());
5713       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5714       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5715     }
5716   } else if (source.IsDoubleStackSlot()) {
5717     if (destination.IsRegister()) {
5718       __ movq(destination.AsRegister<CpuRegister>(),
5719               Address(CpuRegister(RSP), source.GetStackIndex()));
5720     } else if (destination.IsFpuRegister()) {
5721       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5722                Address(CpuRegister(RSP), source.GetStackIndex()));
5723     } else {
5724       DCHECK(destination.IsDoubleStackSlot()) << destination;
5725       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5726       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5727     }
5728   } else if (source.IsSIMDStackSlot()) {
5729     if (destination.IsFpuRegister()) {
5730       __ movups(destination.AsFpuRegister<XmmRegister>(),
5731                 Address(CpuRegister(RSP), source.GetStackIndex()));
5732     } else {
5733       DCHECK(destination.IsSIMDStackSlot());
5734       size_t high = kX86_64WordSize;
5735       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5736       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5737       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5738       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5739     }
5740   } else if (source.IsConstant()) {
5741     HConstant* constant = source.GetConstant();
5742     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5743       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5744       if (destination.IsRegister()) {
5745         if (value == 0) {
5746           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5747         } else {
5748           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5749         }
5750       } else {
5751         DCHECK(destination.IsStackSlot()) << destination;
5752         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5753       }
5754     } else if (constant->IsLongConstant()) {
5755       int64_t value = constant->AsLongConstant()->GetValue();
5756       if (destination.IsRegister()) {
5757         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5758       } else {
5759         DCHECK(destination.IsDoubleStackSlot()) << destination;
5760         codegen_->Store64BitValueToStack(destination, value);
5761       }
5762     } else if (constant->IsFloatConstant()) {
5763       float fp_value = constant->AsFloatConstant()->GetValue();
5764       if (destination.IsFpuRegister()) {
5765         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5766         codegen_->Load32BitValue(dest, fp_value);
5767       } else {
5768         DCHECK(destination.IsStackSlot()) << destination;
5769         Immediate imm(bit_cast<int32_t, float>(fp_value));
5770         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5771       }
5772     } else {
5773       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5774       double fp_value =  constant->AsDoubleConstant()->GetValue();
5775       int64_t value = bit_cast<int64_t, double>(fp_value);
5776       if (destination.IsFpuRegister()) {
5777         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5778         codegen_->Load64BitValue(dest, fp_value);
5779       } else {
5780         DCHECK(destination.IsDoubleStackSlot()) << destination;
5781         codegen_->Store64BitValueToStack(destination, value);
5782       }
5783     }
5784   } else if (source.IsFpuRegister()) {
5785     if (destination.IsFpuRegister()) {
5786       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5787     } else if (destination.IsStackSlot()) {
5788       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5789                source.AsFpuRegister<XmmRegister>());
5790     } else if (destination.IsDoubleStackSlot()) {
5791       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5792                source.AsFpuRegister<XmmRegister>());
5793     } else {
5794        DCHECK(destination.IsSIMDStackSlot());
5795       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5796                 source.AsFpuRegister<XmmRegister>());
5797     }
5798   }
5799 }
5800 
Exchange32(CpuRegister reg,int mem)5801 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5802   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5803   __ movl(Address(CpuRegister(RSP), mem), reg);
5804   __ movl(reg, CpuRegister(TMP));
5805 }
5806 
Exchange64(CpuRegister reg1,CpuRegister reg2)5807 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5808   __ movq(CpuRegister(TMP), reg1);
5809   __ movq(reg1, reg2);
5810   __ movq(reg2, CpuRegister(TMP));
5811 }
5812 
Exchange64(CpuRegister reg,int mem)5813 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5814   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5815   __ movq(Address(CpuRegister(RSP), mem), reg);
5816   __ movq(reg, CpuRegister(TMP));
5817 }
5818 
Exchange32(XmmRegister reg,int mem)5819 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5820   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5821   __ movss(Address(CpuRegister(RSP), mem), reg);
5822   __ movd(reg, CpuRegister(TMP));
5823 }
5824 
Exchange64(XmmRegister reg,int mem)5825 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5826   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5827   __ movsd(Address(CpuRegister(RSP), mem), reg);
5828   __ movd(reg, CpuRegister(TMP));
5829 }
5830 
Exchange128(XmmRegister reg,int mem)5831 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5832   size_t extra_slot = 2 * kX86_64WordSize;
5833   __ subq(CpuRegister(RSP), Immediate(extra_slot));
5834   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5835   ExchangeMemory64(0, mem + extra_slot, 2);
5836   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5837   __ addq(CpuRegister(RSP), Immediate(extra_slot));
5838 }
5839 
ExchangeMemory32(int mem1,int mem2)5840 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5841   ScratchRegisterScope ensure_scratch(
5842       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5843 
5844   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5845   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5846   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5847           Address(CpuRegister(RSP), mem2 + stack_offset));
5848   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5849   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5850           CpuRegister(ensure_scratch.GetRegister()));
5851 }
5852 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5853 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5854   ScratchRegisterScope ensure_scratch(
5855       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5856 
5857   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5858 
5859   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5860   for (int i = 0; i < num_of_qwords; i++) {
5861     __ movq(CpuRegister(TMP),
5862             Address(CpuRegister(RSP), mem1 + stack_offset));
5863     __ movq(CpuRegister(ensure_scratch.GetRegister()),
5864             Address(CpuRegister(RSP), mem2 + stack_offset));
5865     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5866             CpuRegister(TMP));
5867     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5868             CpuRegister(ensure_scratch.GetRegister()));
5869     stack_offset += kX86_64WordSize;
5870   }
5871 }
5872 
EmitSwap(size_t index)5873 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5874   MoveOperands* move = moves_[index];
5875   Location source = move->GetSource();
5876   Location destination = move->GetDestination();
5877 
5878   if (source.IsRegister() && destination.IsRegister()) {
5879     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5880   } else if (source.IsRegister() && destination.IsStackSlot()) {
5881     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5882   } else if (source.IsStackSlot() && destination.IsRegister()) {
5883     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5884   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5885     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5886   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5887     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5888   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5889     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5890   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5891     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5892   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5893     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5894     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5895     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5896   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5897     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5898   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5899     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5900   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5901     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5902   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5903     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5904   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5905     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5906   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5907     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5908   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5909     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5910   } else {
5911     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5912   }
5913 }
5914 
5915 
SpillScratch(int reg)5916 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5917   __ pushq(CpuRegister(reg));
5918 }
5919 
5920 
RestoreScratch(int reg)5921 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5922   __ popq(CpuRegister(reg));
5923 }
5924 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5925 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5926     SlowPathCode* slow_path, CpuRegister class_reg) {
5927   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5928   const size_t status_byte_offset =
5929       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5930   constexpr uint32_t shifted_visibly_initialized_value =
5931       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
5932 
5933   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
5934   __ j(kBelow, slow_path->GetEntryLabel());
5935   __ Bind(slow_path->GetExitLabel());
5936 }
5937 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)5938 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
5939                                                                        CpuRegister temp) {
5940   uint32_t path_to_root = check->GetBitstringPathToRoot();
5941   uint32_t mask = check->GetBitstringMask();
5942   DCHECK(IsPowerOfTwo(mask + 1));
5943   size_t mask_bits = WhichPowerOf2(mask + 1);
5944 
5945   if (mask_bits == 16u) {
5946     // Compare the bitstring in memory.
5947     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
5948   } else {
5949     // /* uint32_t */ temp = temp->status_
5950     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
5951     // Compare the bitstring bits using SUB.
5952     __ subl(temp, Immediate(path_to_root));
5953     // Shift out bits that do not contribute to the comparison.
5954     __ shll(temp, Immediate(32u - mask_bits));
5955   }
5956 }
5957 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5958 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5959     HLoadClass::LoadKind desired_class_load_kind) {
5960   switch (desired_class_load_kind) {
5961     case HLoadClass::LoadKind::kInvalid:
5962       LOG(FATAL) << "UNREACHABLE";
5963       UNREACHABLE();
5964     case HLoadClass::LoadKind::kReferrersClass:
5965       break;
5966     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5967     case HLoadClass::LoadKind::kBootImageRelRo:
5968     case HLoadClass::LoadKind::kBssEntry:
5969       DCHECK(!Runtime::Current()->UseJitCompilation());
5970       break;
5971     case HLoadClass::LoadKind::kJitBootImageAddress:
5972     case HLoadClass::LoadKind::kJitTableAddress:
5973       DCHECK(Runtime::Current()->UseJitCompilation());
5974       break;
5975     case HLoadClass::LoadKind::kRuntimeCall:
5976       break;
5977   }
5978   return desired_class_load_kind;
5979 }
5980 
VisitLoadClass(HLoadClass * cls)5981 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5982   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5983   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5984     // Custom calling convention: RAX serves as both input and output.
5985     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5986         cls,
5987         Location::RegisterLocation(RAX),
5988         Location::RegisterLocation(RAX));
5989     return;
5990   }
5991   DCHECK(!cls->NeedsAccessCheck());
5992 
5993   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5994   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5995       ? LocationSummary::kCallOnSlowPath
5996       : LocationSummary::kNoCall;
5997   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5998   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5999     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6000   }
6001 
6002   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6003     locations->SetInAt(0, Location::RequiresRegister());
6004   }
6005   locations->SetOut(Location::RequiresRegister());
6006   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6007     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6008       // Rely on the type resolution and/or initialization to save everything.
6009       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6010     } else {
6011       // For non-Baker read barrier we have a temp-clobbering call.
6012     }
6013   }
6014 }
6015 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6016 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6017                                                  dex::TypeIndex type_index,
6018                                                  Handle<mirror::Class> handle) {
6019   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6020   // Add a patch entry and return the label.
6021   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6022   PatchInfo<Label>* info = &jit_class_patches_.back();
6023   return &info->label;
6024 }
6025 
6026 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6027 // move.
VisitLoadClass(HLoadClass * cls)6028 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6029   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6030   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6031     codegen_->GenerateLoadClassRuntimeCall(cls);
6032     return;
6033   }
6034   DCHECK(!cls->NeedsAccessCheck());
6035 
6036   LocationSummary* locations = cls->GetLocations();
6037   Location out_loc = locations->Out();
6038   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6039 
6040   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6041       ? kWithoutReadBarrier
6042       : kCompilerReadBarrierOption;
6043   bool generate_null_check = false;
6044   switch (load_kind) {
6045     case HLoadClass::LoadKind::kReferrersClass: {
6046       DCHECK(!cls->CanCallRuntime());
6047       DCHECK(!cls->MustGenerateClinitCheck());
6048       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6049       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6050       GenerateGcRootFieldLoad(
6051           cls,
6052           out_loc,
6053           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6054           /* fixup_label= */ nullptr,
6055           read_barrier_option);
6056       break;
6057     }
6058     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6059       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6060              codegen_->GetCompilerOptions().IsBootImageExtension());
6061       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6062       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6063       codegen_->RecordBootImageTypePatch(cls);
6064       break;
6065     case HLoadClass::LoadKind::kBootImageRelRo: {
6066       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6067       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6068       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
6069       break;
6070     }
6071     case HLoadClass::LoadKind::kBssEntry: {
6072       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6073                                           /* no_rip= */ false);
6074       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6075       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6076       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6077       // No need for memory fence, thanks to the x86-64 memory model.
6078       generate_null_check = true;
6079       break;
6080     }
6081     case HLoadClass::LoadKind::kJitBootImageAddress: {
6082       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6083       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6084       DCHECK_NE(address, 0u);
6085       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6086       break;
6087     }
6088     case HLoadClass::LoadKind::kJitTableAddress: {
6089       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6090                                           /* no_rip= */ true);
6091       Label* fixup_label =
6092           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6093       // /* GcRoot<mirror::Class> */ out = *address
6094       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6095       break;
6096     }
6097     default:
6098       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6099       UNREACHABLE();
6100   }
6101 
6102   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6103     DCHECK(cls->CanCallRuntime());
6104     SlowPathCode* slow_path =
6105         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6106     codegen_->AddSlowPath(slow_path);
6107     if (generate_null_check) {
6108       __ testl(out, out);
6109       __ j(kEqual, slow_path->GetEntryLabel());
6110     }
6111     if (cls->MustGenerateClinitCheck()) {
6112       GenerateClassInitializationCheck(slow_path, out);
6113     } else {
6114       __ Bind(slow_path->GetExitLabel());
6115     }
6116   }
6117 }
6118 
VisitClinitCheck(HClinitCheck * check)6119 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6120   LocationSummary* locations =
6121       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6122   locations->SetInAt(0, Location::RequiresRegister());
6123   if (check->HasUses()) {
6124     locations->SetOut(Location::SameAsFirstInput());
6125   }
6126   // Rely on the type initialization to save everything we need.
6127   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6128 }
6129 
VisitLoadMethodHandle(HLoadMethodHandle * load)6130 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6131   // Custom calling convention: RAX serves as both input and output.
6132   Location location = Location::RegisterLocation(RAX);
6133   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6134 }
6135 
VisitLoadMethodHandle(HLoadMethodHandle * load)6136 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6137   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6138 }
6139 
VisitLoadMethodType(HLoadMethodType * load)6140 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6141   // Custom calling convention: RAX serves as both input and output.
6142   Location location = Location::RegisterLocation(RAX);
6143   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6144 }
6145 
VisitLoadMethodType(HLoadMethodType * load)6146 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6147   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6148 }
6149 
VisitClinitCheck(HClinitCheck * check)6150 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6151   // We assume the class to not be null.
6152   SlowPathCode* slow_path =
6153       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6154   codegen_->AddSlowPath(slow_path);
6155   GenerateClassInitializationCheck(slow_path,
6156                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6157 }
6158 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6159 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6160     HLoadString::LoadKind desired_string_load_kind) {
6161   switch (desired_string_load_kind) {
6162     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6163     case HLoadString::LoadKind::kBootImageRelRo:
6164     case HLoadString::LoadKind::kBssEntry:
6165       DCHECK(!Runtime::Current()->UseJitCompilation());
6166       break;
6167     case HLoadString::LoadKind::kJitBootImageAddress:
6168     case HLoadString::LoadKind::kJitTableAddress:
6169       DCHECK(Runtime::Current()->UseJitCompilation());
6170       break;
6171     case HLoadString::LoadKind::kRuntimeCall:
6172       break;
6173   }
6174   return desired_string_load_kind;
6175 }
6176 
VisitLoadString(HLoadString * load)6177 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6178   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6179   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6180   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6181     locations->SetOut(Location::RegisterLocation(RAX));
6182   } else {
6183     locations->SetOut(Location::RequiresRegister());
6184     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6185       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6186         // Rely on the pResolveString to save everything.
6187         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6188       } else {
6189         // For non-Baker read barrier we have a temp-clobbering call.
6190       }
6191     }
6192   }
6193 }
6194 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6195 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6196                                                   dex::StringIndex string_index,
6197                                                   Handle<mirror::String> handle) {
6198   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6199   // Add a patch entry and return the label.
6200   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6201   PatchInfo<Label>* info = &jit_string_patches_.back();
6202   return &info->label;
6203 }
6204 
6205 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6206 // move.
VisitLoadString(HLoadString * load)6207 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6208   LocationSummary* locations = load->GetLocations();
6209   Location out_loc = locations->Out();
6210   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6211 
6212   switch (load->GetLoadKind()) {
6213     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6214       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6215              codegen_->GetCompilerOptions().IsBootImageExtension());
6216       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6217       codegen_->RecordBootImageStringPatch(load);
6218       return;
6219     }
6220     case HLoadString::LoadKind::kBootImageRelRo: {
6221       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6222       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6223       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
6224       return;
6225     }
6226     case HLoadString::LoadKind::kBssEntry: {
6227       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6228                                           /* no_rip= */ false);
6229       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6230       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6231       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6232       // No need for memory fence, thanks to the x86-64 memory model.
6233       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6234       codegen_->AddSlowPath(slow_path);
6235       __ testl(out, out);
6236       __ j(kEqual, slow_path->GetEntryLabel());
6237       __ Bind(slow_path->GetExitLabel());
6238       return;
6239     }
6240     case HLoadString::LoadKind::kJitBootImageAddress: {
6241       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6242       DCHECK_NE(address, 0u);
6243       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6244       return;
6245     }
6246     case HLoadString::LoadKind::kJitTableAddress: {
6247       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6248                                           /* no_rip= */ true);
6249       Label* fixup_label = codegen_->NewJitRootStringPatch(
6250           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6251       // /* GcRoot<mirror::String> */ out = *address
6252       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6253       return;
6254     }
6255     default:
6256       break;
6257   }
6258 
6259   // TODO: Re-add the compiler code to do string dex cache lookup again.
6260   // Custom calling convention: RAX serves as both input and output.
6261   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6262   codegen_->InvokeRuntime(kQuickResolveString,
6263                           load,
6264                           load->GetDexPc());
6265   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6266 }
6267 
GetExceptionTlsAddress()6268 static Address GetExceptionTlsAddress() {
6269   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6270                            /* no_rip= */ true);
6271 }
6272 
VisitLoadException(HLoadException * load)6273 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6274   LocationSummary* locations =
6275       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6276   locations->SetOut(Location::RequiresRegister());
6277 }
6278 
VisitLoadException(HLoadException * load)6279 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6280   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6281 }
6282 
VisitClearException(HClearException * clear)6283 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6284   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6285 }
6286 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6287 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6288   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6289 }
6290 
VisitThrow(HThrow * instruction)6291 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6292   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6293       instruction, LocationSummary::kCallOnMainOnly);
6294   InvokeRuntimeCallingConvention calling_convention;
6295   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6296 }
6297 
VisitThrow(HThrow * instruction)6298 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6299   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6300   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6301 }
6302 
6303 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6304 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6305   if (kEmitCompilerReadBarrier &&
6306       !kUseBakerReadBarrier &&
6307       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6308        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6309        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6310     return 1;
6311   }
6312   return 0;
6313 }
6314 
6315 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6316 // interface pointer, the current interface is compared in memory.
6317 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6318 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6319   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6320     return 2;
6321   }
6322   return 1 + NumberOfInstanceOfTemps(type_check_kind);
6323 }
6324 
VisitInstanceOf(HInstanceOf * instruction)6325 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6326   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6327   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6328   bool baker_read_barrier_slow_path = false;
6329   switch (type_check_kind) {
6330     case TypeCheckKind::kExactCheck:
6331     case TypeCheckKind::kAbstractClassCheck:
6332     case TypeCheckKind::kClassHierarchyCheck:
6333     case TypeCheckKind::kArrayObjectCheck: {
6334       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6335       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6336       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6337       break;
6338     }
6339     case TypeCheckKind::kArrayCheck:
6340     case TypeCheckKind::kUnresolvedCheck:
6341     case TypeCheckKind::kInterfaceCheck:
6342       call_kind = LocationSummary::kCallOnSlowPath;
6343       break;
6344     case TypeCheckKind::kBitstringCheck:
6345       break;
6346   }
6347 
6348   LocationSummary* locations =
6349       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6350   if (baker_read_barrier_slow_path) {
6351     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6352   }
6353   locations->SetInAt(0, Location::RequiresRegister());
6354   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6355     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6356     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6357     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6358   } else {
6359     locations->SetInAt(1, Location::Any());
6360   }
6361   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6362   locations->SetOut(Location::RequiresRegister());
6363   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6364 }
6365 
VisitInstanceOf(HInstanceOf * instruction)6366 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6367   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6368   LocationSummary* locations = instruction->GetLocations();
6369   Location obj_loc = locations->InAt(0);
6370   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6371   Location cls = locations->InAt(1);
6372   Location out_loc =  locations->Out();
6373   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6374   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6375   DCHECK_LE(num_temps, 1u);
6376   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6377   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6378   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6379   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6380   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6381   SlowPathCode* slow_path = nullptr;
6382   NearLabel done, zero;
6383 
6384   // Return 0 if `obj` is null.
6385   // Avoid null check if we know obj is not null.
6386   if (instruction->MustDoNullCheck()) {
6387     __ testl(obj, obj);
6388     __ j(kEqual, &zero);
6389   }
6390 
6391   switch (type_check_kind) {
6392     case TypeCheckKind::kExactCheck: {
6393       ReadBarrierOption read_barrier_option =
6394           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6395       // /* HeapReference<Class> */ out = obj->klass_
6396       GenerateReferenceLoadTwoRegisters(instruction,
6397                                         out_loc,
6398                                         obj_loc,
6399                                         class_offset,
6400                                         read_barrier_option);
6401       if (cls.IsRegister()) {
6402         __ cmpl(out, cls.AsRegister<CpuRegister>());
6403       } else {
6404         DCHECK(cls.IsStackSlot()) << cls;
6405         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6406       }
6407       if (zero.IsLinked()) {
6408         // Classes must be equal for the instanceof to succeed.
6409         __ j(kNotEqual, &zero);
6410         __ movl(out, Immediate(1));
6411         __ jmp(&done);
6412       } else {
6413         __ setcc(kEqual, out);
6414         // setcc only sets the low byte.
6415         __ andl(out, Immediate(1));
6416       }
6417       break;
6418     }
6419 
6420     case TypeCheckKind::kAbstractClassCheck: {
6421       ReadBarrierOption read_barrier_option =
6422           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6423       // /* HeapReference<Class> */ out = obj->klass_
6424       GenerateReferenceLoadTwoRegisters(instruction,
6425                                         out_loc,
6426                                         obj_loc,
6427                                         class_offset,
6428                                         read_barrier_option);
6429       // If the class is abstract, we eagerly fetch the super class of the
6430       // object to avoid doing a comparison we know will fail.
6431       NearLabel loop, success;
6432       __ Bind(&loop);
6433       // /* HeapReference<Class> */ out = out->super_class_
6434       GenerateReferenceLoadOneRegister(instruction,
6435                                        out_loc,
6436                                        super_offset,
6437                                        maybe_temp_loc,
6438                                        read_barrier_option);
6439       __ testl(out, out);
6440       // If `out` is null, we use it for the result, and jump to `done`.
6441       __ j(kEqual, &done);
6442       if (cls.IsRegister()) {
6443         __ cmpl(out, cls.AsRegister<CpuRegister>());
6444       } else {
6445         DCHECK(cls.IsStackSlot()) << cls;
6446         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6447       }
6448       __ j(kNotEqual, &loop);
6449       __ movl(out, Immediate(1));
6450       if (zero.IsLinked()) {
6451         __ jmp(&done);
6452       }
6453       break;
6454     }
6455 
6456     case TypeCheckKind::kClassHierarchyCheck: {
6457       ReadBarrierOption read_barrier_option =
6458           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6459       // /* HeapReference<Class> */ out = obj->klass_
6460       GenerateReferenceLoadTwoRegisters(instruction,
6461                                         out_loc,
6462                                         obj_loc,
6463                                         class_offset,
6464                                         read_barrier_option);
6465       // Walk over the class hierarchy to find a match.
6466       NearLabel loop, success;
6467       __ Bind(&loop);
6468       if (cls.IsRegister()) {
6469         __ cmpl(out, cls.AsRegister<CpuRegister>());
6470       } else {
6471         DCHECK(cls.IsStackSlot()) << cls;
6472         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6473       }
6474       __ j(kEqual, &success);
6475       // /* HeapReference<Class> */ out = out->super_class_
6476       GenerateReferenceLoadOneRegister(instruction,
6477                                        out_loc,
6478                                        super_offset,
6479                                        maybe_temp_loc,
6480                                        read_barrier_option);
6481       __ testl(out, out);
6482       __ j(kNotEqual, &loop);
6483       // If `out` is null, we use it for the result, and jump to `done`.
6484       __ jmp(&done);
6485       __ Bind(&success);
6486       __ movl(out, Immediate(1));
6487       if (zero.IsLinked()) {
6488         __ jmp(&done);
6489       }
6490       break;
6491     }
6492 
6493     case TypeCheckKind::kArrayObjectCheck: {
6494       ReadBarrierOption read_barrier_option =
6495           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6496       // /* HeapReference<Class> */ out = obj->klass_
6497       GenerateReferenceLoadTwoRegisters(instruction,
6498                                         out_loc,
6499                                         obj_loc,
6500                                         class_offset,
6501                                         read_barrier_option);
6502       // Do an exact check.
6503       NearLabel exact_check;
6504       if (cls.IsRegister()) {
6505         __ cmpl(out, cls.AsRegister<CpuRegister>());
6506       } else {
6507         DCHECK(cls.IsStackSlot()) << cls;
6508         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6509       }
6510       __ j(kEqual, &exact_check);
6511       // Otherwise, we need to check that the object's class is a non-primitive array.
6512       // /* HeapReference<Class> */ out = out->component_type_
6513       GenerateReferenceLoadOneRegister(instruction,
6514                                        out_loc,
6515                                        component_offset,
6516                                        maybe_temp_loc,
6517                                        read_barrier_option);
6518       __ testl(out, out);
6519       // If `out` is null, we use it for the result, and jump to `done`.
6520       __ j(kEqual, &done);
6521       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6522       __ j(kNotEqual, &zero);
6523       __ Bind(&exact_check);
6524       __ movl(out, Immediate(1));
6525       __ jmp(&done);
6526       break;
6527     }
6528 
6529     case TypeCheckKind::kArrayCheck: {
6530       // No read barrier since the slow path will retry upon failure.
6531       // /* HeapReference<Class> */ out = obj->klass_
6532       GenerateReferenceLoadTwoRegisters(instruction,
6533                                         out_loc,
6534                                         obj_loc,
6535                                         class_offset,
6536                                         kWithoutReadBarrier);
6537       if (cls.IsRegister()) {
6538         __ cmpl(out, cls.AsRegister<CpuRegister>());
6539       } else {
6540         DCHECK(cls.IsStackSlot()) << cls;
6541         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6542       }
6543       DCHECK(locations->OnlyCallsOnSlowPath());
6544       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6545           instruction, /* is_fatal= */ false);
6546       codegen_->AddSlowPath(slow_path);
6547       __ j(kNotEqual, slow_path->GetEntryLabel());
6548       __ movl(out, Immediate(1));
6549       if (zero.IsLinked()) {
6550         __ jmp(&done);
6551       }
6552       break;
6553     }
6554 
6555     case TypeCheckKind::kUnresolvedCheck:
6556     case TypeCheckKind::kInterfaceCheck: {
6557       // Note that we indeed only call on slow path, but we always go
6558       // into the slow path for the unresolved and interface check
6559       // cases.
6560       //
6561       // We cannot directly call the InstanceofNonTrivial runtime
6562       // entry point without resorting to a type checking slow path
6563       // here (i.e. by calling InvokeRuntime directly), as it would
6564       // require to assign fixed registers for the inputs of this
6565       // HInstanceOf instruction (following the runtime calling
6566       // convention), which might be cluttered by the potential first
6567       // read barrier emission at the beginning of this method.
6568       //
6569       // TODO: Introduce a new runtime entry point taking the object
6570       // to test (instead of its class) as argument, and let it deal
6571       // with the read barrier issues. This will let us refactor this
6572       // case of the `switch` code as it was previously (with a direct
6573       // call to the runtime not using a type checking slow path).
6574       // This should also be beneficial for the other cases above.
6575       DCHECK(locations->OnlyCallsOnSlowPath());
6576       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6577           instruction, /* is_fatal= */ false);
6578       codegen_->AddSlowPath(slow_path);
6579       __ jmp(slow_path->GetEntryLabel());
6580       if (zero.IsLinked()) {
6581         __ jmp(&done);
6582       }
6583       break;
6584     }
6585 
6586     case TypeCheckKind::kBitstringCheck: {
6587       // /* HeapReference<Class> */ temp = obj->klass_
6588       GenerateReferenceLoadTwoRegisters(instruction,
6589                                         out_loc,
6590                                         obj_loc,
6591                                         class_offset,
6592                                         kWithoutReadBarrier);
6593 
6594       GenerateBitstringTypeCheckCompare(instruction, out);
6595       if (zero.IsLinked()) {
6596         __ j(kNotEqual, &zero);
6597         __ movl(out, Immediate(1));
6598         __ jmp(&done);
6599       } else {
6600         __ setcc(kEqual, out);
6601         // setcc only sets the low byte.
6602         __ andl(out, Immediate(1));
6603       }
6604       break;
6605     }
6606   }
6607 
6608   if (zero.IsLinked()) {
6609     __ Bind(&zero);
6610     __ xorl(out, out);
6611   }
6612 
6613   if (done.IsLinked()) {
6614     __ Bind(&done);
6615   }
6616 
6617   if (slow_path != nullptr) {
6618     __ Bind(slow_path->GetExitLabel());
6619   }
6620 }
6621 
VisitCheckCast(HCheckCast * instruction)6622 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6623   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6624   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6625   LocationSummary* locations =
6626       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6627   locations->SetInAt(0, Location::RequiresRegister());
6628   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6629     // Require a register for the interface check since there is a loop that compares the class to
6630     // a memory address.
6631     locations->SetInAt(1, Location::RequiresRegister());
6632   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6633     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6634     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6635     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6636   } else {
6637     locations->SetInAt(1, Location::Any());
6638   }
6639   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6640   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6641 }
6642 
VisitCheckCast(HCheckCast * instruction)6643 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6644   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6645   LocationSummary* locations = instruction->GetLocations();
6646   Location obj_loc = locations->InAt(0);
6647   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6648   Location cls = locations->InAt(1);
6649   Location temp_loc = locations->GetTemp(0);
6650   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6651   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6652   DCHECK_GE(num_temps, 1u);
6653   DCHECK_LE(num_temps, 2u);
6654   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6655   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6656   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6657   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6658   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6659   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6660   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6661   const uint32_t object_array_data_offset =
6662       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6663 
6664   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6665   SlowPathCode* type_check_slow_path =
6666       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6667           instruction, is_type_check_slow_path_fatal);
6668   codegen_->AddSlowPath(type_check_slow_path);
6669 
6670 
6671   NearLabel done;
6672   // Avoid null check if we know obj is not null.
6673   if (instruction->MustDoNullCheck()) {
6674     __ testl(obj, obj);
6675     __ j(kEqual, &done);
6676   }
6677 
6678   switch (type_check_kind) {
6679     case TypeCheckKind::kExactCheck:
6680     case TypeCheckKind::kArrayCheck: {
6681       // /* HeapReference<Class> */ temp = obj->klass_
6682       GenerateReferenceLoadTwoRegisters(instruction,
6683                                         temp_loc,
6684                                         obj_loc,
6685                                         class_offset,
6686                                         kWithoutReadBarrier);
6687       if (cls.IsRegister()) {
6688         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6689       } else {
6690         DCHECK(cls.IsStackSlot()) << cls;
6691         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6692       }
6693       // Jump to slow path for throwing the exception or doing a
6694       // more involved array check.
6695       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6696       break;
6697     }
6698 
6699     case TypeCheckKind::kAbstractClassCheck: {
6700       // /* HeapReference<Class> */ temp = obj->klass_
6701       GenerateReferenceLoadTwoRegisters(instruction,
6702                                         temp_loc,
6703                                         obj_loc,
6704                                         class_offset,
6705                                         kWithoutReadBarrier);
6706       // If the class is abstract, we eagerly fetch the super class of the
6707       // object to avoid doing a comparison we know will fail.
6708       NearLabel loop;
6709       __ Bind(&loop);
6710       // /* HeapReference<Class> */ temp = temp->super_class_
6711       GenerateReferenceLoadOneRegister(instruction,
6712                                        temp_loc,
6713                                        super_offset,
6714                                        maybe_temp2_loc,
6715                                        kWithoutReadBarrier);
6716 
6717       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6718       // exception.
6719       __ testl(temp, temp);
6720       // Otherwise, compare the classes.
6721       __ j(kZero, type_check_slow_path->GetEntryLabel());
6722       if (cls.IsRegister()) {
6723         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6724       } else {
6725         DCHECK(cls.IsStackSlot()) << cls;
6726         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6727       }
6728       __ j(kNotEqual, &loop);
6729       break;
6730     }
6731 
6732     case TypeCheckKind::kClassHierarchyCheck: {
6733       // /* HeapReference<Class> */ temp = obj->klass_
6734       GenerateReferenceLoadTwoRegisters(instruction,
6735                                         temp_loc,
6736                                         obj_loc,
6737                                         class_offset,
6738                                         kWithoutReadBarrier);
6739       // Walk over the class hierarchy to find a match.
6740       NearLabel loop;
6741       __ Bind(&loop);
6742       if (cls.IsRegister()) {
6743         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6744       } else {
6745         DCHECK(cls.IsStackSlot()) << cls;
6746         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6747       }
6748       __ j(kEqual, &done);
6749 
6750       // /* HeapReference<Class> */ temp = temp->super_class_
6751       GenerateReferenceLoadOneRegister(instruction,
6752                                        temp_loc,
6753                                        super_offset,
6754                                        maybe_temp2_loc,
6755                                        kWithoutReadBarrier);
6756 
6757       // If the class reference currently in `temp` is not null, jump
6758       // back at the beginning of the loop.
6759       __ testl(temp, temp);
6760       __ j(kNotZero, &loop);
6761       // Otherwise, jump to the slow path to throw the exception.
6762       __ jmp(type_check_slow_path->GetEntryLabel());
6763       break;
6764     }
6765 
6766     case TypeCheckKind::kArrayObjectCheck: {
6767       // /* HeapReference<Class> */ temp = obj->klass_
6768       GenerateReferenceLoadTwoRegisters(instruction,
6769                                         temp_loc,
6770                                         obj_loc,
6771                                         class_offset,
6772                                         kWithoutReadBarrier);
6773       // Do an exact check.
6774       NearLabel check_non_primitive_component_type;
6775       if (cls.IsRegister()) {
6776         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6777       } else {
6778         DCHECK(cls.IsStackSlot()) << cls;
6779         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6780       }
6781       __ j(kEqual, &done);
6782 
6783       // Otherwise, we need to check that the object's class is a non-primitive array.
6784       // /* HeapReference<Class> */ temp = temp->component_type_
6785       GenerateReferenceLoadOneRegister(instruction,
6786                                        temp_loc,
6787                                        component_offset,
6788                                        maybe_temp2_loc,
6789                                        kWithoutReadBarrier);
6790 
6791       // If the component type is not null (i.e. the object is indeed
6792       // an array), jump to label `check_non_primitive_component_type`
6793       // to further check that this component type is not a primitive
6794       // type.
6795       __ testl(temp, temp);
6796       // Otherwise, jump to the slow path to throw the exception.
6797       __ j(kZero, type_check_slow_path->GetEntryLabel());
6798       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6799       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6800       break;
6801     }
6802 
6803     case TypeCheckKind::kUnresolvedCheck: {
6804       // We always go into the type check slow path for the unresolved case.
6805       //
6806       // We cannot directly call the CheckCast runtime entry point
6807       // without resorting to a type checking slow path here (i.e. by
6808       // calling InvokeRuntime directly), as it would require to
6809       // assign fixed registers for the inputs of this HInstanceOf
6810       // instruction (following the runtime calling convention), which
6811       // might be cluttered by the potential first read barrier
6812       // emission at the beginning of this method.
6813       __ jmp(type_check_slow_path->GetEntryLabel());
6814       break;
6815     }
6816 
6817     case TypeCheckKind::kInterfaceCheck: {
6818       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6819       // We can not get false positives by doing this.
6820       // /* HeapReference<Class> */ temp = obj->klass_
6821       GenerateReferenceLoadTwoRegisters(instruction,
6822                                         temp_loc,
6823                                         obj_loc,
6824                                         class_offset,
6825                                         kWithoutReadBarrier);
6826 
6827       // /* HeapReference<Class> */ temp = temp->iftable_
6828       GenerateReferenceLoadTwoRegisters(instruction,
6829                                         temp_loc,
6830                                         temp_loc,
6831                                         iftable_offset,
6832                                         kWithoutReadBarrier);
6833       // Iftable is never null.
6834       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6835       // Maybe poison the `cls` for direct comparison with memory.
6836       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6837       // Loop through the iftable and check if any class matches.
6838       NearLabel start_loop;
6839       __ Bind(&start_loop);
6840       // Need to subtract first to handle the empty array case.
6841       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6842       __ j(kNegative, type_check_slow_path->GetEntryLabel());
6843       // Go to next interface if the classes do not match.
6844       __ cmpl(cls.AsRegister<CpuRegister>(),
6845               CodeGeneratorX86_64::ArrayAddress(temp,
6846                                                 maybe_temp2_loc,
6847                                                 TIMES_4,
6848                                                 object_array_data_offset));
6849       __ j(kNotEqual, &start_loop);  // Return if same class.
6850       // If `cls` was poisoned above, unpoison it.
6851       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6852       break;
6853     }
6854 
6855     case TypeCheckKind::kBitstringCheck: {
6856       // /* HeapReference<Class> */ temp = obj->klass_
6857       GenerateReferenceLoadTwoRegisters(instruction,
6858                                         temp_loc,
6859                                         obj_loc,
6860                                         class_offset,
6861                                         kWithoutReadBarrier);
6862 
6863       GenerateBitstringTypeCheckCompare(instruction, temp);
6864       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6865       break;
6866     }
6867   }
6868 
6869   if (done.IsLinked()) {
6870     __ Bind(&done);
6871   }
6872 
6873   __ Bind(type_check_slow_path->GetExitLabel());
6874 }
6875 
VisitMonitorOperation(HMonitorOperation * instruction)6876 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6877   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6878       instruction, LocationSummary::kCallOnMainOnly);
6879   InvokeRuntimeCallingConvention calling_convention;
6880   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6881 }
6882 
VisitMonitorOperation(HMonitorOperation * instruction)6883 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6884   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6885                           instruction,
6886                           instruction->GetDexPc());
6887   if (instruction->IsEnter()) {
6888     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6889   } else {
6890     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6891   }
6892 }
6893 
VisitX86AndNot(HX86AndNot * instruction)6894 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6895   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6896   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6897   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6898   locations->SetInAt(0, Location::RequiresRegister());
6899   // There is no immediate variant of negated bitwise and in X86.
6900   locations->SetInAt(1, Location::RequiresRegister());
6901   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6902 }
6903 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6904 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6905   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6906   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6907   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6908   locations->SetInAt(0, Location::RequiresRegister());
6909   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6910 }
6911 
VisitX86AndNot(HX86AndNot * instruction)6912 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6913   LocationSummary* locations = instruction->GetLocations();
6914   Location first = locations->InAt(0);
6915   Location second = locations->InAt(1);
6916   Location dest = locations->Out();
6917   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6918 }
6919 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6920 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6921   LocationSummary* locations = instruction->GetLocations();
6922   Location src = locations->InAt(0);
6923   Location dest = locations->Out();
6924   switch (instruction->GetOpKind()) {
6925     case HInstruction::kAnd:
6926       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6927       break;
6928     case HInstruction::kXor:
6929       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6930       break;
6931     default:
6932       LOG(FATAL) << "Unreachable";
6933   }
6934 }
6935 
VisitAnd(HAnd * instruction)6936 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6937 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6938 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6939 
HandleBitwiseOperation(HBinaryOperation * instruction)6940 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6941   LocationSummary* locations =
6942       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6943   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6944          || instruction->GetResultType() == DataType::Type::kInt64);
6945   locations->SetInAt(0, Location::RequiresRegister());
6946   locations->SetInAt(1, Location::Any());
6947   locations->SetOut(Location::SameAsFirstInput());
6948 }
6949 
VisitAnd(HAnd * instruction)6950 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6951   HandleBitwiseOperation(instruction);
6952 }
6953 
VisitOr(HOr * instruction)6954 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6955   HandleBitwiseOperation(instruction);
6956 }
6957 
VisitXor(HXor * instruction)6958 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6959   HandleBitwiseOperation(instruction);
6960 }
6961 
HandleBitwiseOperation(HBinaryOperation * instruction)6962 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6963   LocationSummary* locations = instruction->GetLocations();
6964   Location first = locations->InAt(0);
6965   Location second = locations->InAt(1);
6966   DCHECK(first.Equals(locations->Out()));
6967 
6968   if (instruction->GetResultType() == DataType::Type::kInt32) {
6969     if (second.IsRegister()) {
6970       if (instruction->IsAnd()) {
6971         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6972       } else if (instruction->IsOr()) {
6973         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6974       } else {
6975         DCHECK(instruction->IsXor());
6976         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6977       }
6978     } else if (second.IsConstant()) {
6979       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6980       if (instruction->IsAnd()) {
6981         __ andl(first.AsRegister<CpuRegister>(), imm);
6982       } else if (instruction->IsOr()) {
6983         __ orl(first.AsRegister<CpuRegister>(), imm);
6984       } else {
6985         DCHECK(instruction->IsXor());
6986         __ xorl(first.AsRegister<CpuRegister>(), imm);
6987       }
6988     } else {
6989       Address address(CpuRegister(RSP), second.GetStackIndex());
6990       if (instruction->IsAnd()) {
6991         __ andl(first.AsRegister<CpuRegister>(), address);
6992       } else if (instruction->IsOr()) {
6993         __ orl(first.AsRegister<CpuRegister>(), address);
6994       } else {
6995         DCHECK(instruction->IsXor());
6996         __ xorl(first.AsRegister<CpuRegister>(), address);
6997       }
6998     }
6999   } else {
7000     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7001     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7002     bool second_is_constant = false;
7003     int64_t value = 0;
7004     if (second.IsConstant()) {
7005       second_is_constant = true;
7006       value = second.GetConstant()->AsLongConstant()->GetValue();
7007     }
7008     bool is_int32_value = IsInt<32>(value);
7009 
7010     if (instruction->IsAnd()) {
7011       if (second_is_constant) {
7012         if (is_int32_value) {
7013           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7014         } else {
7015           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7016         }
7017       } else if (second.IsDoubleStackSlot()) {
7018         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7019       } else {
7020         __ andq(first_reg, second.AsRegister<CpuRegister>());
7021       }
7022     } else if (instruction->IsOr()) {
7023       if (second_is_constant) {
7024         if (is_int32_value) {
7025           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7026         } else {
7027           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7028         }
7029       } else if (second.IsDoubleStackSlot()) {
7030         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7031       } else {
7032         __ orq(first_reg, second.AsRegister<CpuRegister>());
7033       }
7034     } else {
7035       DCHECK(instruction->IsXor());
7036       if (second_is_constant) {
7037         if (is_int32_value) {
7038           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7039         } else {
7040           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7041         }
7042       } else if (second.IsDoubleStackSlot()) {
7043         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7044       } else {
7045         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7046       }
7047     }
7048   }
7049 }
7050 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7051 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7052     HInstruction* instruction,
7053     Location out,
7054     uint32_t offset,
7055     Location maybe_temp,
7056     ReadBarrierOption read_barrier_option) {
7057   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7058   if (read_barrier_option == kWithReadBarrier) {
7059     CHECK(kEmitCompilerReadBarrier);
7060     if (kUseBakerReadBarrier) {
7061       // Load with fast path based Baker's read barrier.
7062       // /* HeapReference<Object> */ out = *(out + offset)
7063       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7064           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7065     } else {
7066       // Load with slow path based read barrier.
7067       // Save the value of `out` into `maybe_temp` before overwriting it
7068       // in the following move operation, as we will need it for the
7069       // read barrier below.
7070       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7071       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7072       // /* HeapReference<Object> */ out = *(out + offset)
7073       __ movl(out_reg, Address(out_reg, offset));
7074       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7075     }
7076   } else {
7077     // Plain load with no read barrier.
7078     // /* HeapReference<Object> */ out = *(out + offset)
7079     __ movl(out_reg, Address(out_reg, offset));
7080     __ MaybeUnpoisonHeapReference(out_reg);
7081   }
7082 }
7083 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7084 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7085     HInstruction* instruction,
7086     Location out,
7087     Location obj,
7088     uint32_t offset,
7089     ReadBarrierOption read_barrier_option) {
7090   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7091   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7092   if (read_barrier_option == kWithReadBarrier) {
7093     CHECK(kEmitCompilerReadBarrier);
7094     if (kUseBakerReadBarrier) {
7095       // Load with fast path based Baker's read barrier.
7096       // /* HeapReference<Object> */ out = *(obj + offset)
7097       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7098           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7099     } else {
7100       // Load with slow path based read barrier.
7101       // /* HeapReference<Object> */ out = *(obj + offset)
7102       __ movl(out_reg, Address(obj_reg, offset));
7103       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7104     }
7105   } else {
7106     // Plain load with no read barrier.
7107     // /* HeapReference<Object> */ out = *(obj + offset)
7108     __ movl(out_reg, Address(obj_reg, offset));
7109     __ MaybeUnpoisonHeapReference(out_reg);
7110   }
7111 }
7112 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7113 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7114     HInstruction* instruction,
7115     Location root,
7116     const Address& address,
7117     Label* fixup_label,
7118     ReadBarrierOption read_barrier_option) {
7119   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7120   if (read_barrier_option == kWithReadBarrier) {
7121     DCHECK(kEmitCompilerReadBarrier);
7122     if (kUseBakerReadBarrier) {
7123       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7124       // Baker's read barrier are used:
7125       //
7126       //   root = obj.field;
7127       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7128       //   if (temp != null) {
7129       //     root = temp(root)
7130       //   }
7131 
7132       // /* GcRoot<mirror::Object> */ root = *address
7133       __ movl(root_reg, address);
7134       if (fixup_label != nullptr) {
7135         __ Bind(fixup_label);
7136       }
7137       static_assert(
7138           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7139           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7140           "have different sizes.");
7141       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7142                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7143                     "have different sizes.");
7144 
7145       // Slow path marking the GC root `root`.
7146       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7147           instruction, root, /* unpoison_ref_before_marking= */ false);
7148       codegen_->AddSlowPath(slow_path);
7149 
7150       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7151       const int32_t entry_point_offset =
7152           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7153       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7154       // The entrypoint is null when the GC is not marking.
7155       __ j(kNotEqual, slow_path->GetEntryLabel());
7156       __ Bind(slow_path->GetExitLabel());
7157     } else {
7158       // GC root loaded through a slow path for read barriers other
7159       // than Baker's.
7160       // /* GcRoot<mirror::Object>* */ root = address
7161       __ leaq(root_reg, address);
7162       if (fixup_label != nullptr) {
7163         __ Bind(fixup_label);
7164       }
7165       // /* mirror::Object* */ root = root->Read()
7166       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7167     }
7168   } else {
7169     // Plain GC root load with no read barrier.
7170     // /* GcRoot<mirror::Object> */ root = *address
7171     __ movl(root_reg, address);
7172     if (fixup_label != nullptr) {
7173       __ Bind(fixup_label);
7174     }
7175     // Note that GC roots are not affected by heap poisoning, thus we
7176     // do not have to unpoison `root_reg` here.
7177   }
7178 }
7179 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7180 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7181                                                                 Location ref,
7182                                                                 CpuRegister obj,
7183                                                                 uint32_t offset,
7184                                                                 bool needs_null_check) {
7185   DCHECK(kEmitCompilerReadBarrier);
7186   DCHECK(kUseBakerReadBarrier);
7187 
7188   // /* HeapReference<Object> */ ref = *(obj + offset)
7189   Address src(obj, offset);
7190   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7191 }
7192 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7193 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7194                                                                 Location ref,
7195                                                                 CpuRegister obj,
7196                                                                 uint32_t data_offset,
7197                                                                 Location index,
7198                                                                 bool needs_null_check) {
7199   DCHECK(kEmitCompilerReadBarrier);
7200   DCHECK(kUseBakerReadBarrier);
7201 
7202   static_assert(
7203       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7204       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7205   // /* HeapReference<Object> */ ref =
7206   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7207   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7208   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7209 }
7210 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7211 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7212                                                                     Location ref,
7213                                                                     CpuRegister obj,
7214                                                                     const Address& src,
7215                                                                     bool needs_null_check,
7216                                                                     bool always_update_field,
7217                                                                     CpuRegister* temp1,
7218                                                                     CpuRegister* temp2) {
7219   DCHECK(kEmitCompilerReadBarrier);
7220   DCHECK(kUseBakerReadBarrier);
7221 
7222   // In slow path based read barriers, the read barrier call is
7223   // inserted after the original load. However, in fast path based
7224   // Baker's read barriers, we need to perform the load of
7225   // mirror::Object::monitor_ *before* the original reference load.
7226   // This load-load ordering is required by the read barrier.
7227   // The fast path/slow path (for Baker's algorithm) should look like:
7228   //
7229   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7230   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7231   //   HeapReference<Object> ref = *src;  // Original reference load.
7232   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7233   //   if (is_gray) {
7234   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7235   //   }
7236   //
7237   // Note: the original implementation in ReadBarrier::Barrier is
7238   // slightly more complex as:
7239   // - it implements the load-load fence using a data dependency on
7240   //   the high-bits of rb_state, which are expected to be all zeroes
7241   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7242   //   here, which is a no-op thanks to the x86-64 memory model);
7243   // - it performs additional checks that we do not do here for
7244   //   performance reasons.
7245 
7246   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7247   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7248 
7249   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7250   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7251   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7252   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7253   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7254   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7255 
7256   // if (rb_state == ReadBarrier::GrayState())
7257   //   ref = ReadBarrier::Mark(ref);
7258   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7259   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7260   if (needs_null_check) {
7261     MaybeRecordImplicitNullCheck(instruction);
7262   }
7263 
7264   // Load fence to prevent load-load reordering.
7265   // Note that this is a no-op, thanks to the x86-64 memory model.
7266   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7267 
7268   // The actual reference load.
7269   // /* HeapReference<Object> */ ref = *src
7270   __ movl(ref_reg, src);  // Flags are unaffected.
7271 
7272   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7273   // Slow path marking the object `ref` when it is gray.
7274   SlowPathCode* slow_path;
7275   if (always_update_field) {
7276     DCHECK(temp1 != nullptr);
7277     DCHECK(temp2 != nullptr);
7278     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7279         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7280   } else {
7281     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7282         instruction, ref, /* unpoison_ref_before_marking= */ true);
7283   }
7284   AddSlowPath(slow_path);
7285 
7286   // We have done the "if" of the gray bit check above, now branch based on the flags.
7287   __ j(kNotZero, slow_path->GetEntryLabel());
7288 
7289   // Object* ref = ref_addr->AsMirrorPtr()
7290   __ MaybeUnpoisonHeapReference(ref_reg);
7291 
7292   __ Bind(slow_path->GetExitLabel());
7293 }
7294 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7295 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7296                                                   Location out,
7297                                                   Location ref,
7298                                                   Location obj,
7299                                                   uint32_t offset,
7300                                                   Location index) {
7301   DCHECK(kEmitCompilerReadBarrier);
7302 
7303   // Insert a slow path based read barrier *after* the reference load.
7304   //
7305   // If heap poisoning is enabled, the unpoisoning of the loaded
7306   // reference will be carried out by the runtime within the slow
7307   // path.
7308   //
7309   // Note that `ref` currently does not get unpoisoned (when heap
7310   // poisoning is enabled), which is alright as the `ref` argument is
7311   // not used by the artReadBarrierSlow entry point.
7312   //
7313   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7314   SlowPathCode* slow_path = new (GetScopedAllocator())
7315       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7316   AddSlowPath(slow_path);
7317 
7318   __ jmp(slow_path->GetEntryLabel());
7319   __ Bind(slow_path->GetExitLabel());
7320 }
7321 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7322 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7323                                                        Location out,
7324                                                        Location ref,
7325                                                        Location obj,
7326                                                        uint32_t offset,
7327                                                        Location index) {
7328   if (kEmitCompilerReadBarrier) {
7329     // Baker's read barriers shall be handled by the fast path
7330     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7331     DCHECK(!kUseBakerReadBarrier);
7332     // If heap poisoning is enabled, unpoisoning will be taken care of
7333     // by the runtime within the slow path.
7334     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7335   } else if (kPoisonHeapReferences) {
7336     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7337   }
7338 }
7339 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7340 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7341                                                          Location out,
7342                                                          Location root) {
7343   DCHECK(kEmitCompilerReadBarrier);
7344 
7345   // Insert a slow path based read barrier *after* the GC root load.
7346   //
7347   // Note that GC roots are not affected by heap poisoning, so we do
7348   // not need to do anything special for this here.
7349   SlowPathCode* slow_path =
7350       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7351   AddSlowPath(slow_path);
7352 
7353   __ jmp(slow_path->GetEntryLabel());
7354   __ Bind(slow_path->GetExitLabel());
7355 }
7356 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7357 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7358   // Nothing to do, this should be removed during prepare for register allocator.
7359   LOG(FATAL) << "Unreachable";
7360 }
7361 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7362 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7363   // Nothing to do, this should be removed during prepare for register allocator.
7364   LOG(FATAL) << "Unreachable";
7365 }
7366 
7367 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7368 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7369   LocationSummary* locations =
7370       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7371   locations->SetInAt(0, Location::RequiresRegister());
7372   locations->AddTemp(Location::RequiresRegister());
7373   locations->AddTemp(Location::RequiresRegister());
7374 }
7375 
VisitPackedSwitch(HPackedSwitch * switch_instr)7376 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7377   int32_t lower_bound = switch_instr->GetStartValue();
7378   uint32_t num_entries = switch_instr->GetNumEntries();
7379   LocationSummary* locations = switch_instr->GetLocations();
7380   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7381   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7382   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7383   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7384 
7385   // Should we generate smaller inline compare/jumps?
7386   if (num_entries <= kPackedSwitchJumpTableThreshold) {
7387     // Figure out the correct compare values and jump conditions.
7388     // Handle the first compare/branch as a special case because it might
7389     // jump to the default case.
7390     DCHECK_GT(num_entries, 2u);
7391     Condition first_condition;
7392     uint32_t index;
7393     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7394     if (lower_bound != 0) {
7395       first_condition = kLess;
7396       __ cmpl(value_reg_in, Immediate(lower_bound));
7397       __ j(first_condition, codegen_->GetLabelOf(default_block));
7398       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7399 
7400       index = 1;
7401     } else {
7402       // Handle all the compare/jumps below.
7403       first_condition = kBelow;
7404       index = 0;
7405     }
7406 
7407     // Handle the rest of the compare/jumps.
7408     for (; index + 1 < num_entries; index += 2) {
7409       int32_t compare_to_value = lower_bound + index + 1;
7410       __ cmpl(value_reg_in, Immediate(compare_to_value));
7411       // Jump to successors[index] if value < case_value[index].
7412       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7413       // Jump to successors[index + 1] if value == case_value[index + 1].
7414       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7415     }
7416 
7417     if (index != num_entries) {
7418       // There are an odd number of entries. Handle the last one.
7419       DCHECK_EQ(index + 1, num_entries);
7420       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7421       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7422     }
7423 
7424     // And the default for any other value.
7425     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7426       __ jmp(codegen_->GetLabelOf(default_block));
7427     }
7428     return;
7429   }
7430 
7431   // Remove the bias, if needed.
7432   Register value_reg_out = value_reg_in.AsRegister();
7433   if (lower_bound != 0) {
7434     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7435     value_reg_out = temp_reg.AsRegister();
7436   }
7437   CpuRegister value_reg(value_reg_out);
7438 
7439   // Is the value in range?
7440   __ cmpl(value_reg, Immediate(num_entries - 1));
7441   __ j(kAbove, codegen_->GetLabelOf(default_block));
7442 
7443   // We are in the range of the table.
7444   // Load the address of the jump table in the constant area.
7445   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7446 
7447   // Load the (signed) offset from the jump table.
7448   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7449 
7450   // Add the offset to the address of the table base.
7451   __ addq(temp_reg, base_reg);
7452 
7453   // And jump.
7454   __ jmp(temp_reg);
7455 }
7456 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7457 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7458                                                       ATTRIBUTE_UNUSED) {
7459   LOG(FATAL) << "Unreachable";
7460 }
7461 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7462 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7463                                                               ATTRIBUTE_UNUSED) {
7464   LOG(FATAL) << "Unreachable";
7465 }
7466 
Load32BitValue(CpuRegister dest,int32_t value)7467 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7468   if (value == 0) {
7469     __ xorl(dest, dest);
7470   } else {
7471     __ movl(dest, Immediate(value));
7472   }
7473 }
7474 
Load64BitValue(CpuRegister dest,int64_t value)7475 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7476   if (value == 0) {
7477     // Clears upper bits too.
7478     __ xorl(dest, dest);
7479   } else if (IsUint<32>(value)) {
7480     // We can use a 32 bit move, as it will zero-extend and is shorter.
7481     __ movl(dest, Immediate(static_cast<int32_t>(value)));
7482   } else {
7483     __ movq(dest, Immediate(value));
7484   }
7485 }
7486 
Load32BitValue(XmmRegister dest,int32_t value)7487 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7488   if (value == 0) {
7489     __ xorps(dest, dest);
7490   } else {
7491     __ movss(dest, LiteralInt32Address(value));
7492   }
7493 }
7494 
Load64BitValue(XmmRegister dest,int64_t value)7495 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7496   if (value == 0) {
7497     __ xorpd(dest, dest);
7498   } else {
7499     __ movsd(dest, LiteralInt64Address(value));
7500   }
7501 }
7502 
Load32BitValue(XmmRegister dest,float value)7503 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7504   Load32BitValue(dest, bit_cast<int32_t, float>(value));
7505 }
7506 
Load64BitValue(XmmRegister dest,double value)7507 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7508   Load64BitValue(dest, bit_cast<int64_t, double>(value));
7509 }
7510 
Compare32BitValue(CpuRegister dest,int32_t value)7511 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7512   if (value == 0) {
7513     __ testl(dest, dest);
7514   } else {
7515     __ cmpl(dest, Immediate(value));
7516   }
7517 }
7518 
Compare64BitValue(CpuRegister dest,int64_t value)7519 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7520   if (IsInt<32>(value)) {
7521     if (value == 0) {
7522       __ testq(dest, dest);
7523     } else {
7524       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7525     }
7526   } else {
7527     // Value won't fit in an int.
7528     __ cmpq(dest, LiteralInt64Address(value));
7529   }
7530 }
7531 
GenerateIntCompare(Location lhs,Location rhs)7532 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7533   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7534   GenerateIntCompare(lhs_reg, rhs);
7535 }
7536 
GenerateIntCompare(CpuRegister lhs,Location rhs)7537 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7538   if (rhs.IsConstant()) {
7539     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7540     Compare32BitValue(lhs, value);
7541   } else if (rhs.IsStackSlot()) {
7542     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7543   } else {
7544     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7545   }
7546 }
7547 
GenerateLongCompare(Location lhs,Location rhs)7548 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7549   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7550   if (rhs.IsConstant()) {
7551     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7552     Compare64BitValue(lhs_reg, value);
7553   } else if (rhs.IsDoubleStackSlot()) {
7554     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7555   } else {
7556     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7557   }
7558 }
7559 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7560 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7561                                           Location index,
7562                                           ScaleFactor scale,
7563                                           uint32_t data_offset) {
7564   return index.IsConstant() ?
7565       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7566       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7567 }
7568 
Store64BitValueToStack(Location dest,int64_t value)7569 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7570   DCHECK(dest.IsDoubleStackSlot());
7571   if (IsInt<32>(value)) {
7572     // Can move directly as an int32 constant.
7573     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7574             Immediate(static_cast<int32_t>(value)));
7575   } else {
7576     Load64BitValue(CpuRegister(TMP), value);
7577     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7578   }
7579 }
7580 
7581 /**
7582  * Class to handle late fixup of offsets into constant area.
7583  */
7584 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7585  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7586   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7587       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7588 
7589  protected:
SetOffset(size_t offset)7590   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7591 
7592   CodeGeneratorX86_64* codegen_;
7593 
7594  private:
Process(const MemoryRegion & region,int pos)7595   void Process(const MemoryRegion& region, int pos) override {
7596     // Patch the correct offset for the instruction.  We use the address of the
7597     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7598     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7599     int32_t relative_position = constant_offset - pos;
7600 
7601     // Patch in the right value.
7602     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7603   }
7604 
7605   // Location in constant area that the fixup refers to.
7606   size_t offset_into_constant_area_;
7607 };
7608 
7609 /**
7610  t * Class to handle late fixup of offsets to a jump table that will be created in the
7611  * constant area.
7612  */
7613 class JumpTableRIPFixup : public RIPFixup {
7614  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7615   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7616       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7617 
CreateJumpTable()7618   void CreateJumpTable() {
7619     X86_64Assembler* assembler = codegen_->GetAssembler();
7620 
7621     // Ensure that the reference to the jump table has the correct offset.
7622     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7623     SetOffset(offset_in_constant_table);
7624 
7625     // Compute the offset from the start of the function to this jump table.
7626     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7627 
7628     // Populate the jump table with the correct values for the jump table.
7629     int32_t num_entries = switch_instr_->GetNumEntries();
7630     HBasicBlock* block = switch_instr_->GetBlock();
7631     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7632     // The value that we want is the target offset - the position of the table.
7633     for (int32_t i = 0; i < num_entries; i++) {
7634       HBasicBlock* b = successors[i];
7635       Label* l = codegen_->GetLabelOf(b);
7636       DCHECK(l->IsBound());
7637       int32_t offset_to_block = l->Position() - current_table_offset;
7638       assembler->AppendInt32(offset_to_block);
7639     }
7640   }
7641 
7642  private:
7643   const HPackedSwitch* switch_instr_;
7644 };
7645 
Finalize(CodeAllocator * allocator)7646 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7647   // Generate the constant area if needed.
7648   X86_64Assembler* assembler = GetAssembler();
7649   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7650     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7651     assembler->Align(4, 0);
7652     constant_area_start_ = assembler->CodeSize();
7653 
7654     // Populate any jump tables.
7655     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7656       jump_table->CreateJumpTable();
7657     }
7658 
7659     // And now add the constant area to the generated code.
7660     assembler->AddConstantArea();
7661   }
7662 
7663   // And finish up.
7664   CodeGenerator::Finalize(allocator);
7665 }
7666 
LiteralDoubleAddress(double v)7667 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7668   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7669   return Address::RIP(fixup);
7670 }
7671 
LiteralFloatAddress(float v)7672 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7673   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7674   return Address::RIP(fixup);
7675 }
7676 
LiteralInt32Address(int32_t v)7677 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7678   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7679   return Address::RIP(fixup);
7680 }
7681 
LiteralInt64Address(int64_t v)7682 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7683   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7684   return Address::RIP(fixup);
7685 }
7686 
7687 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7688 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7689   if (!trg.IsValid()) {
7690     DCHECK_EQ(type, DataType::Type::kVoid);
7691     return;
7692   }
7693 
7694   DCHECK_NE(type, DataType::Type::kVoid);
7695 
7696   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7697   if (trg.Equals(return_loc)) {
7698     return;
7699   }
7700 
7701   // Let the parallel move resolver take care of all of this.
7702   HParallelMove parallel_move(GetGraph()->GetAllocator());
7703   parallel_move.AddMove(return_loc, trg, type, nullptr);
7704   GetMoveResolver()->EmitNativeCode(&parallel_move);
7705 }
7706 
LiteralCaseTable(HPackedSwitch * switch_instr)7707 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7708   // Create a fixup to be used to create and address the jump table.
7709   JumpTableRIPFixup* table_fixup =
7710       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7711 
7712   // We have to populate the jump tables.
7713   fixups_to_jump_tables_.push_back(table_fixup);
7714   return Address::RIP(table_fixup);
7715 }
7716 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7717 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7718                                              const Address& addr_high,
7719                                              int64_t v,
7720                                              HInstruction* instruction) {
7721   if (IsInt<32>(v)) {
7722     int32_t v_32 = v;
7723     __ movq(addr_low, Immediate(v_32));
7724     MaybeRecordImplicitNullCheck(instruction);
7725   } else {
7726     // Didn't fit in a register.  Do it in pieces.
7727     int32_t low_v = Low32Bits(v);
7728     int32_t high_v = High32Bits(v);
7729     __ movl(addr_low, Immediate(low_v));
7730     MaybeRecordImplicitNullCheck(instruction);
7731     __ movl(addr_high, Immediate(high_v));
7732   }
7733 }
7734 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7735 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7736                                           const uint8_t* roots_data,
7737                                           const PatchInfo<Label>& info,
7738                                           uint64_t index_in_table) const {
7739   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7740   uintptr_t address =
7741       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7742   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
7743   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7744      dchecked_integral_cast<uint32_t>(address);
7745 }
7746 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7747 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7748   for (const PatchInfo<Label>& info : jit_string_patches_) {
7749     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7750     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7751     PatchJitRootUse(code, roots_data, info, index_in_table);
7752   }
7753 
7754   for (const PatchInfo<Label>& info : jit_class_patches_) {
7755     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7756     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7757     PatchJitRootUse(code, roots_data, info, index_in_table);
7758   }
7759 }
7760 
CpuHasAvxFeatureFlag()7761 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
7762   return codegen_->GetInstructionSetFeatures().HasAVX();
7763 }
7764 
CpuHasAvx2FeatureFlag()7765 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
7766   return codegen_->GetInstructionSetFeatures().HasAVX2();
7767 }
7768 
CpuHasAvxFeatureFlag()7769 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
7770   return codegen_->GetInstructionSetFeatures().HasAVX();
7771 }
7772 
CpuHasAvx2FeatureFlag()7773 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
7774   return codegen_->GetInstructionSetFeatures().HasAVX2();
7775 }
7776 
7777 #undef __
7778 
7779 }  // namespace x86_64
7780 }  // namespace art
7781