1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "gc/space/image_space.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_x86_64.h"
29 #include "linker/linker_patch.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/class-inl.h"
33 #include "mirror/object_reference.h"
34 #include "thread.h"
35 #include "utils/assembler.h"
36 #include "utils/stack_checks.h"
37 #include "utils/x86_64/assembler_x86_64.h"
38 #include "utils/x86_64/managed_register_x86_64.h"
39 
40 namespace art {
41 
42 template<class MirrorType>
43 class GcRoot;
44 
45 namespace x86_64 {
46 
47 static constexpr int kCurrentMethodStackOffset = 0;
48 static constexpr Register kMethodRegisterArgument = RDI;
49 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
50 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
51 // generates less code/data with a small num_entries.
52 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
53 
54 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
55 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
56 
57 static constexpr int kC2ConditionMask = 0x400;
58 
59 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
60   // Custom calling convention: RAX serves as both input and output.
61   RegisterSet caller_saves = RegisterSet::Empty();
62   caller_saves.Add(Location::RegisterLocation(RAX));
63   return caller_saves;
64 }
65 
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
68 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
69 
70 class NullCheckSlowPathX86_64 : public SlowPathCode {
71  public:
72   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
73 
74   void EmitNativeCode(CodeGenerator* codegen) override {
75     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
76     __ Bind(GetEntryLabel());
77     if (instruction_->CanThrowIntoCatchBlock()) {
78       // Live registers will be restored in the catch block if caught.
79       SaveLiveRegisters(codegen, instruction_->GetLocations());
80     }
81     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
82                                   instruction_,
83                                   instruction_->GetDexPc(),
84                                   this);
85     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
86   }
87 
88   bool IsFatal() const override { return true; }
89 
90   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
91 
92  private:
93   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
94 };
95 
96 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
97  public:
98   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
99 
100   void EmitNativeCode(CodeGenerator* codegen) override {
101     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
102     __ Bind(GetEntryLabel());
103     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
104     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
105   }
106 
107   bool IsFatal() const override { return true; }
108 
109   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
110 
111  private:
112   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
113 };
114 
115 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
116  public:
117   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
118       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
119 
120   void EmitNativeCode(CodeGenerator* codegen) override {
121     __ Bind(GetEntryLabel());
122     if (type_ == DataType::Type::kInt32) {
123       if (is_div_) {
124         __ negl(cpu_reg_);
125       } else {
126         __ xorl(cpu_reg_, cpu_reg_);
127       }
128 
129     } else {
130       DCHECK_EQ(DataType::Type::kInt64, type_);
131       if (is_div_) {
132         __ negq(cpu_reg_);
133       } else {
134         __ xorl(cpu_reg_, cpu_reg_);
135       }
136     }
137     __ jmp(GetExitLabel());
138   }
139 
140   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
141 
142  private:
143   const CpuRegister cpu_reg_;
144   const DataType::Type type_;
145   const bool is_div_;
146   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
147 };
148 
149 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
150  public:
151   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
152       : SlowPathCode(instruction), successor_(successor) {}
153 
154   void EmitNativeCode(CodeGenerator* codegen) override {
155     LocationSummary* locations = instruction_->GetLocations();
156     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
157     __ Bind(GetEntryLabel());
158     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
159     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
160     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
161     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
162     if (successor_ == nullptr) {
163       __ jmp(GetReturnLabel());
164     } else {
165       __ jmp(x86_64_codegen->GetLabelOf(successor_));
166     }
167   }
168 
169   Label* GetReturnLabel() {
170     DCHECK(successor_ == nullptr);
171     return &return_label_;
172   }
173 
174   HBasicBlock* GetSuccessor() const {
175     return successor_;
176   }
177 
178   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
179 
180  private:
181   HBasicBlock* const successor_;
182   Label return_label_;
183 
184   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
185 };
186 
187 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
188  public:
189   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
190     : SlowPathCode(instruction) {}
191 
192   void EmitNativeCode(CodeGenerator* codegen) override {
193     LocationSummary* locations = instruction_->GetLocations();
194     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
195     __ Bind(GetEntryLabel());
196     if (instruction_->CanThrowIntoCatchBlock()) {
197       // Live registers will be restored in the catch block if caught.
198       SaveLiveRegisters(codegen, instruction_->GetLocations());
199     }
200     // Are we using an array length from memory?
201     HInstruction* array_length = instruction_->InputAt(1);
202     Location length_loc = locations->InAt(1);
203     InvokeRuntimeCallingConvention calling_convention;
204     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
205       // Load the array length into our temporary.
206       HArrayLength* length = array_length->AsArrayLength();
207       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
208       Location array_loc = array_length->GetLocations()->InAt(0);
209       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
210       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
211       // Check for conflicts with index.
212       if (length_loc.Equals(locations->InAt(0))) {
213         // We know we aren't using parameter 2.
214         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
215       }
216       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
217       if (mirror::kUseStringCompression && length->IsStringLength()) {
218         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
219       }
220     }
221 
222     // We're moving two locations to locations that could overlap, so we need a parallel
223     // move resolver.
224     codegen->EmitParallelMoves(
225         locations->InAt(0),
226         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
227         DataType::Type::kInt32,
228         length_loc,
229         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
230         DataType::Type::kInt32);
231     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
232         ? kQuickThrowStringBounds
233         : kQuickThrowArrayBounds;
234     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
235     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
236     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
237   }
238 
239   bool IsFatal() const override { return true; }
240 
241   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
242 
243  private:
244   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
245 };
246 
247 class LoadClassSlowPathX86_64 : public SlowPathCode {
248  public:
249   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
250       : SlowPathCode(at), cls_(cls) {
251     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
252     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
253   }
254 
255   void EmitNativeCode(CodeGenerator* codegen) override {
256     LocationSummary* locations = instruction_->GetLocations();
257     Location out = locations->Out();
258     const uint32_t dex_pc = instruction_->GetDexPc();
259     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
260     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
261 
262     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
263     __ Bind(GetEntryLabel());
264     SaveLiveRegisters(codegen, locations);
265 
266     // Custom calling convention: RAX serves as both input and output.
267     if (must_resolve_type) {
268       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
269       dex::TypeIndex type_index = cls_->GetTypeIndex();
270       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
271       x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
272       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
273       // If we also must_do_clinit, the resolved type is now in the correct register.
274     } else {
275       DCHECK(must_do_clinit);
276       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
277       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
278     }
279     if (must_do_clinit) {
280       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
281       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
282     }
283 
284     // Move the class to the desired location.
285     if (out.IsValid()) {
286       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
287       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
288     }
289 
290     RestoreLiveRegisters(codegen, locations);
291     __ jmp(GetExitLabel());
292   }
293 
294   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
295 
296  private:
297   // The class this slow path will load.
298   HLoadClass* const cls_;
299 
300   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
301 };
302 
303 class LoadStringSlowPathX86_64 : public SlowPathCode {
304  public:
305   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
306 
307   void EmitNativeCode(CodeGenerator* codegen) override {
308     LocationSummary* locations = instruction_->GetLocations();
309     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
310 
311     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
312     __ Bind(GetEntryLabel());
313     SaveLiveRegisters(codegen, locations);
314 
315     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
316     // Custom calling convention: RAX serves as both input and output.
317     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
318     x86_64_codegen->InvokeRuntime(kQuickResolveString,
319                                   instruction_,
320                                   instruction_->GetDexPc(),
321                                   this);
322     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
323     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
324     RestoreLiveRegisters(codegen, locations);
325 
326     __ jmp(GetExitLabel());
327   }
328 
329   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
330 
331  private:
332   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
333 };
334 
335 class TypeCheckSlowPathX86_64 : public SlowPathCode {
336  public:
337   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
338       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
339 
340   void EmitNativeCode(CodeGenerator* codegen) override {
341     LocationSummary* locations = instruction_->GetLocations();
342     uint32_t dex_pc = instruction_->GetDexPc();
343     DCHECK(instruction_->IsCheckCast()
344            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
345 
346     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
347     __ Bind(GetEntryLabel());
348 
349     if (kPoisonHeapReferences &&
350         instruction_->IsCheckCast() &&
351         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
352       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
353       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
354     }
355 
356     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
357       SaveLiveRegisters(codegen, locations);
358     }
359 
360     // We're moving two locations to locations that could overlap, so we need a parallel
361     // move resolver.
362     InvokeRuntimeCallingConvention calling_convention;
363     codegen->EmitParallelMoves(locations->InAt(0),
364                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
365                                DataType::Type::kReference,
366                                locations->InAt(1),
367                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
368                                DataType::Type::kReference);
369     if (instruction_->IsInstanceOf()) {
370       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
371       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
372     } else {
373       DCHECK(instruction_->IsCheckCast());
374       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
375       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
376     }
377 
378     if (!is_fatal_) {
379       if (instruction_->IsInstanceOf()) {
380         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
381       }
382 
383       RestoreLiveRegisters(codegen, locations);
384       __ jmp(GetExitLabel());
385     }
386   }
387 
388   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
389 
390   bool IsFatal() const override { return is_fatal_; }
391 
392  private:
393   const bool is_fatal_;
394 
395   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
396 };
397 
398 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
399  public:
400   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
401       : SlowPathCode(instruction) {}
402 
403   void EmitNativeCode(CodeGenerator* codegen) override {
404     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
405     __ Bind(GetEntryLabel());
406     LocationSummary* locations = instruction_->GetLocations();
407     SaveLiveRegisters(codegen, locations);
408     InvokeRuntimeCallingConvention calling_convention;
409     x86_64_codegen->Load32BitValue(
410         CpuRegister(calling_convention.GetRegisterAt(0)),
411         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
412     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
413     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
414   }
415 
416   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
417 
418  private:
419   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
420 };
421 
422 class ArraySetSlowPathX86_64 : public SlowPathCode {
423  public:
424   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
425 
426   void EmitNativeCode(CodeGenerator* codegen) override {
427     LocationSummary* locations = instruction_->GetLocations();
428     __ Bind(GetEntryLabel());
429     SaveLiveRegisters(codegen, locations);
430 
431     InvokeRuntimeCallingConvention calling_convention;
432     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
433     parallel_move.AddMove(
434         locations->InAt(0),
435         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
436         DataType::Type::kReference,
437         nullptr);
438     parallel_move.AddMove(
439         locations->InAt(1),
440         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
441         DataType::Type::kInt32,
442         nullptr);
443     parallel_move.AddMove(
444         locations->InAt(2),
445         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
446         DataType::Type::kReference,
447         nullptr);
448     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
449 
450     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
451     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
452     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
453     RestoreLiveRegisters(codegen, locations);
454     __ jmp(GetExitLabel());
455   }
456 
457   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
458 
459  private:
460   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
461 };
462 
463 // Slow path marking an object reference `ref` during a read
464 // barrier. The field `obj.field` in the object `obj` holding this
465 // reference does not get updated by this slow path after marking (see
466 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
467 //
468 // This means that after the execution of this slow path, `ref` will
469 // always be up-to-date, but `obj.field` may not; i.e., after the
470 // flip, `ref` will be a to-space reference, but `obj.field` will
471 // probably still be a from-space reference (unless it gets updated by
472 // another thread, or if another thread installed another object
473 // reference (different from `ref`) in `obj.field`).
474 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
475  public:
476   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
477                                 Location ref,
478                                 bool unpoison_ref_before_marking)
479       : SlowPathCode(instruction),
480         ref_(ref),
481         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
482     DCHECK(kEmitCompilerReadBarrier);
483   }
484 
485   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
486 
487   void EmitNativeCode(CodeGenerator* codegen) override {
488     LocationSummary* locations = instruction_->GetLocations();
489     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
490     Register ref_reg = ref_cpu_reg.AsRegister();
491     DCHECK(locations->CanCall());
492     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
493     DCHECK(instruction_->IsInstanceFieldGet() ||
494            instruction_->IsStaticFieldGet() ||
495            instruction_->IsArrayGet() ||
496            instruction_->IsArraySet() ||
497            instruction_->IsLoadClass() ||
498            instruction_->IsLoadString() ||
499            instruction_->IsInstanceOf() ||
500            instruction_->IsCheckCast() ||
501            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
502            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
503         << "Unexpected instruction in read barrier marking slow path: "
504         << instruction_->DebugName();
505 
506     __ Bind(GetEntryLabel());
507     if (unpoison_ref_before_marking_) {
508       // Object* ref = ref_addr->AsMirrorPtr()
509       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
510     }
511     // No need to save live registers; it's taken care of by the
512     // entrypoint. Also, there is no need to update the stack mask,
513     // as this runtime call will not trigger a garbage collection.
514     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
515     DCHECK_NE(ref_reg, RSP);
516     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
517     // "Compact" slow path, saving two moves.
518     //
519     // Instead of using the standard runtime calling convention (input
520     // and output in R0):
521     //
522     //   RDI <- ref
523     //   RAX <- ReadBarrierMark(RDI)
524     //   ref <- RAX
525     //
526     // we just use rX (the register containing `ref`) as input and output
527     // of a dedicated entrypoint:
528     //
529     //   rX <- ReadBarrierMarkRegX(rX)
530     //
531     int32_t entry_point_offset =
532         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
533     // This runtime call does not require a stack map.
534     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
535     __ jmp(GetExitLabel());
536   }
537 
538  private:
539   // The location (register) of the marked object reference.
540   const Location ref_;
541   // Should the reference in `ref_` be unpoisoned prior to marking it?
542   const bool unpoison_ref_before_marking_;
543 
544   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
545 };
546 
547 // Slow path marking an object reference `ref` during a read barrier,
548 // and if needed, atomically updating the field `obj.field` in the
549 // object `obj` holding this reference after marking (contrary to
550 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
551 // `obj.field`).
552 //
553 // This means that after the execution of this slow path, both `ref`
554 // and `obj.field` will be up-to-date; i.e., after the flip, both will
555 // hold the same to-space reference (unless another thread installed
556 // another object reference (different from `ref`) in `obj.field`).
557 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
558  public:
559   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
560                                               Location ref,
561                                               CpuRegister obj,
562                                               const Address& field_addr,
563                                               bool unpoison_ref_before_marking,
564                                               CpuRegister temp1,
565                                               CpuRegister temp2)
566       : SlowPathCode(instruction),
567         ref_(ref),
568         obj_(obj),
569         field_addr_(field_addr),
570         unpoison_ref_before_marking_(unpoison_ref_before_marking),
571         temp1_(temp1),
572         temp2_(temp2) {
573     DCHECK(kEmitCompilerReadBarrier);
574   }
575 
576   const char* GetDescription() const override {
577     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
578   }
579 
580   void EmitNativeCode(CodeGenerator* codegen) override {
581     LocationSummary* locations = instruction_->GetLocations();
582     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
583     Register ref_reg = ref_cpu_reg.AsRegister();
584     DCHECK(locations->CanCall());
585     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
586     // This slow path is only used by the UnsafeCASObject intrinsic.
587     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
588         << "Unexpected instruction in read barrier marking and field updating slow path: "
589         << instruction_->DebugName();
590     DCHECK(instruction_->GetLocations()->Intrinsified());
591     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
592 
593     __ Bind(GetEntryLabel());
594     if (unpoison_ref_before_marking_) {
595       // Object* ref = ref_addr->AsMirrorPtr()
596       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
597     }
598 
599     // Save the old (unpoisoned) reference.
600     __ movl(temp1_, ref_cpu_reg);
601 
602     // No need to save live registers; it's taken care of by the
603     // entrypoint. Also, there is no need to update the stack mask,
604     // as this runtime call will not trigger a garbage collection.
605     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
606     DCHECK_NE(ref_reg, RSP);
607     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
608     // "Compact" slow path, saving two moves.
609     //
610     // Instead of using the standard runtime calling convention (input
611     // and output in R0):
612     //
613     //   RDI <- ref
614     //   RAX <- ReadBarrierMark(RDI)
615     //   ref <- RAX
616     //
617     // we just use rX (the register containing `ref`) as input and output
618     // of a dedicated entrypoint:
619     //
620     //   rX <- ReadBarrierMarkRegX(rX)
621     //
622     int32_t entry_point_offset =
623         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
624     // This runtime call does not require a stack map.
625     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
626 
627     // If the new reference is different from the old reference,
628     // update the field in the holder (`*field_addr`).
629     //
630     // Note that this field could also hold a different object, if
631     // another thread had concurrently changed it. In that case, the
632     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
633     // operation below would abort the CAS, leaving the field as-is.
634     NearLabel done;
635     __ cmpl(temp1_, ref_cpu_reg);
636     __ j(kEqual, &done);
637 
638     // Update the the holder's field atomically.  This may fail if
639     // mutator updates before us, but it's OK.  This is achived
640     // using a strong compare-and-set (CAS) operation with relaxed
641     // memory synchronization ordering, where the expected value is
642     // the old reference and the desired value is the new reference.
643     // This operation is implemented with a 32-bit LOCK CMPXLCHG
644     // instruction, which requires the expected value (the old
645     // reference) to be in EAX.  Save RAX beforehand, and move the
646     // expected value (stored in `temp1_`) into EAX.
647     __ movq(temp2_, CpuRegister(RAX));
648     __ movl(CpuRegister(RAX), temp1_);
649 
650     // Convenience aliases.
651     CpuRegister base = obj_;
652     CpuRegister expected = CpuRegister(RAX);
653     CpuRegister value = ref_cpu_reg;
654 
655     bool base_equals_value = (base.AsRegister() == value.AsRegister());
656     Register value_reg = ref_reg;
657     if (kPoisonHeapReferences) {
658       if (base_equals_value) {
659         // If `base` and `value` are the same register location, move
660         // `value_reg` to a temporary register.  This way, poisoning
661         // `value_reg` won't invalidate `base`.
662         value_reg = temp1_.AsRegister();
663         __ movl(CpuRegister(value_reg), base);
664       }
665 
666       // Check that the register allocator did not assign the location
667       // of `expected` (RAX) to `value` nor to `base`, so that heap
668       // poisoning (when enabled) works as intended below.
669       // - If `value` were equal to `expected`, both references would
670       //   be poisoned twice, meaning they would not be poisoned at
671       //   all, as heap poisoning uses address negation.
672       // - If `base` were equal to `expected`, poisoning `expected`
673       //   would invalidate `base`.
674       DCHECK_NE(value_reg, expected.AsRegister());
675       DCHECK_NE(base.AsRegister(), expected.AsRegister());
676 
677       __ PoisonHeapReference(expected);
678       __ PoisonHeapReference(CpuRegister(value_reg));
679     }
680 
681     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
682 
683     // If heap poisoning is enabled, we need to unpoison the values
684     // that were poisoned earlier.
685     if (kPoisonHeapReferences) {
686       if (base_equals_value) {
687         // `value_reg` has been moved to a temporary register, no need
688         // to unpoison it.
689       } else {
690         __ UnpoisonHeapReference(CpuRegister(value_reg));
691       }
692       // No need to unpoison `expected` (RAX), as it is be overwritten below.
693     }
694 
695     // Restore RAX.
696     __ movq(CpuRegister(RAX), temp2_);
697 
698     __ Bind(&done);
699     __ jmp(GetExitLabel());
700   }
701 
702  private:
703   // The location (register) of the marked object reference.
704   const Location ref_;
705   // The register containing the object holding the marked object reference field.
706   const CpuRegister obj_;
707   // The address of the marked reference field.  The base of this address must be `obj_`.
708   const Address field_addr_;
709 
710   // Should the reference in `ref_` be unpoisoned prior to marking it?
711   const bool unpoison_ref_before_marking_;
712 
713   const CpuRegister temp1_;
714   const CpuRegister temp2_;
715 
716   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
717 };
718 
719 // Slow path generating a read barrier for a heap reference.
720 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
721  public:
722   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
723                                             Location out,
724                                             Location ref,
725                                             Location obj,
726                                             uint32_t offset,
727                                             Location index)
728       : SlowPathCode(instruction),
729         out_(out),
730         ref_(ref),
731         obj_(obj),
732         offset_(offset),
733         index_(index) {
734     DCHECK(kEmitCompilerReadBarrier);
735     // If `obj` is equal to `out` or `ref`, it means the initial
736     // object has been overwritten by (or after) the heap object
737     // reference load to be instrumented, e.g.:
738     //
739     //   __ movl(out, Address(out, offset));
740     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
741     //
742     // In that case, we have lost the information about the original
743     // object, and the emitted read barrier cannot work properly.
744     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
745     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
746 }
747 
748   void EmitNativeCode(CodeGenerator* codegen) override {
749     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
750     LocationSummary* locations = instruction_->GetLocations();
751     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
752     DCHECK(locations->CanCall());
753     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
754     DCHECK(instruction_->IsInstanceFieldGet() ||
755            instruction_->IsStaticFieldGet() ||
756            instruction_->IsArrayGet() ||
757            instruction_->IsInstanceOf() ||
758            instruction_->IsCheckCast() ||
759            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
760         << "Unexpected instruction in read barrier for heap reference slow path: "
761         << instruction_->DebugName();
762 
763     __ Bind(GetEntryLabel());
764     SaveLiveRegisters(codegen, locations);
765 
766     // We may have to change the index's value, but as `index_` is a
767     // constant member (like other "inputs" of this slow path),
768     // introduce a copy of it, `index`.
769     Location index = index_;
770     if (index_.IsValid()) {
771       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
772       if (instruction_->IsArrayGet()) {
773         // Compute real offset and store it in index_.
774         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
775         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
776         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
777           // We are about to change the value of `index_reg` (see the
778           // calls to art::x86_64::X86_64Assembler::shll and
779           // art::x86_64::X86_64Assembler::AddImmediate below), but it
780           // has not been saved by the previous call to
781           // art::SlowPathCode::SaveLiveRegisters, as it is a
782           // callee-save register --
783           // art::SlowPathCode::SaveLiveRegisters does not consider
784           // callee-save registers, as it has been designed with the
785           // assumption that callee-save registers are supposed to be
786           // handled by the called function.  So, as a callee-save
787           // register, `index_reg` _would_ eventually be saved onto
788           // the stack, but it would be too late: we would have
789           // changed its value earlier.  Therefore, we manually save
790           // it here into another freely available register,
791           // `free_reg`, chosen of course among the caller-save
792           // registers (as a callee-save `free_reg` register would
793           // exhibit the same problem).
794           //
795           // Note we could have requested a temporary register from
796           // the register allocator instead; but we prefer not to, as
797           // this is a slow path, and we know we can find a
798           // caller-save register that is available.
799           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
800           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
801           index_reg = free_reg;
802           index = Location::RegisterLocation(index_reg);
803         } else {
804           // The initial register stored in `index_` has already been
805           // saved in the call to art::SlowPathCode::SaveLiveRegisters
806           // (as it is not a callee-save register), so we can freely
807           // use it.
808         }
809         // Shifting the index value contained in `index_reg` by the
810         // scale factor (2) cannot overflow in practice, as the
811         // runtime is unable to allocate object arrays with a size
812         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
813         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
814         static_assert(
815             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
816             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
817         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
818       } else {
819         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
820         // intrinsics, `index_` is not shifted by a scale factor of 2
821         // (as in the case of ArrayGet), as it is actually an offset
822         // to an object field within an object.
823         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
824         DCHECK(instruction_->GetLocations()->Intrinsified());
825         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
826                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
827             << instruction_->AsInvoke()->GetIntrinsic();
828         DCHECK_EQ(offset_, 0U);
829         DCHECK(index_.IsRegister());
830       }
831     }
832 
833     // We're moving two or three locations to locations that could
834     // overlap, so we need a parallel move resolver.
835     InvokeRuntimeCallingConvention calling_convention;
836     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
837     parallel_move.AddMove(ref_,
838                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
839                           DataType::Type::kReference,
840                           nullptr);
841     parallel_move.AddMove(obj_,
842                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
843                           DataType::Type::kReference,
844                           nullptr);
845     if (index.IsValid()) {
846       parallel_move.AddMove(index,
847                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
848                             DataType::Type::kInt32,
849                             nullptr);
850       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
851     } else {
852       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
853       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
854     }
855     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
856                                   instruction_,
857                                   instruction_->GetDexPc(),
858                                   this);
859     CheckEntrypointTypes<
860         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
861     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
862 
863     RestoreLiveRegisters(codegen, locations);
864     __ jmp(GetExitLabel());
865   }
866 
867   const char* GetDescription() const override {
868     return "ReadBarrierForHeapReferenceSlowPathX86_64";
869   }
870 
871  private:
872   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
873     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
874     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
875     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
876       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
877         return static_cast<CpuRegister>(i);
878       }
879     }
880     // We shall never fail to find a free caller-save register, as
881     // there are more than two core caller-save registers on x86-64
882     // (meaning it is possible to find one which is different from
883     // `ref` and `obj`).
884     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
885     LOG(FATAL) << "Could not find a free caller-save register";
886     UNREACHABLE();
887   }
888 
889   const Location out_;
890   const Location ref_;
891   const Location obj_;
892   const uint32_t offset_;
893   // An additional location containing an index to an array.
894   // Only used for HArrayGet and the UnsafeGetObject &
895   // UnsafeGetObjectVolatile intrinsics.
896   const Location index_;
897 
898   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
899 };
900 
901 // Slow path generating a read barrier for a GC root.
902 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
903  public:
904   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
905       : SlowPathCode(instruction), out_(out), root_(root) {
906     DCHECK(kEmitCompilerReadBarrier);
907   }
908 
909   void EmitNativeCode(CodeGenerator* codegen) override {
910     LocationSummary* locations = instruction_->GetLocations();
911     DCHECK(locations->CanCall());
912     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
913     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
914         << "Unexpected instruction in read barrier for GC root slow path: "
915         << instruction_->DebugName();
916 
917     __ Bind(GetEntryLabel());
918     SaveLiveRegisters(codegen, locations);
919 
920     InvokeRuntimeCallingConvention calling_convention;
921     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
922     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
923     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
924                                   instruction_,
925                                   instruction_->GetDexPc(),
926                                   this);
927     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
928     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
929 
930     RestoreLiveRegisters(codegen, locations);
931     __ jmp(GetExitLabel());
932   }
933 
934   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
935 
936  private:
937   const Location out_;
938   const Location root_;
939 
940   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
941 };
942 
943 #undef __
944 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
945 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
946 
947 inline Condition X86_64IntegerCondition(IfCondition cond) {
948   switch (cond) {
949     case kCondEQ: return kEqual;
950     case kCondNE: return kNotEqual;
951     case kCondLT: return kLess;
952     case kCondLE: return kLessEqual;
953     case kCondGT: return kGreater;
954     case kCondGE: return kGreaterEqual;
955     case kCondB:  return kBelow;
956     case kCondBE: return kBelowEqual;
957     case kCondA:  return kAbove;
958     case kCondAE: return kAboveEqual;
959   }
960   LOG(FATAL) << "Unreachable";
961   UNREACHABLE();
962 }
963 
964 // Maps FP condition to x86_64 name.
965 inline Condition X86_64FPCondition(IfCondition cond) {
966   switch (cond) {
967     case kCondEQ: return kEqual;
968     case kCondNE: return kNotEqual;
969     case kCondLT: return kBelow;
970     case kCondLE: return kBelowEqual;
971     case kCondGT: return kAbove;
972     case kCondGE: return kAboveEqual;
973     default:      break;  // should not happen
974   }
975   LOG(FATAL) << "Unreachable";
976   UNREACHABLE();
977 }
978 
979 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
980       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
981       ArtMethod* method ATTRIBUTE_UNUSED) {
982   return desired_dispatch_info;
983 }
984 
985 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
986     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
987   // All registers are assumed to be correctly set up.
988 
989   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
990   switch (invoke->GetMethodLoadKind()) {
991     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
992       // temp = thread->string_init_entrypoint
993       uint32_t offset =
994           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
995       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
996       break;
997     }
998     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
999       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1000       break;
1001     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
1002       DCHECK(GetCompilerOptions().IsBootImage());
1003       __ leal(temp.AsRegister<CpuRegister>(),
1004               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1005       RecordBootImageMethodPatch(invoke);
1006       break;
1007     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
1008       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1009       __ movl(temp.AsRegister<CpuRegister>(),
1010               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1011       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1012       break;
1013     }
1014     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1015       __ movq(temp.AsRegister<CpuRegister>(),
1016               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1017       RecordMethodBssEntryPatch(invoke);
1018       break;
1019     }
1020     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
1021       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1022       break;
1023     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1024       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1025       return;  // No code pointer retrieval; the runtime performs the call directly.
1026     }
1027   }
1028 
1029   switch (invoke->GetCodePtrLocation()) {
1030     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1031       __ call(&frame_entry_label_);
1032       break;
1033     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1034       // (callee_method + offset_of_quick_compiled_code)()
1035       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1036                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1037                           kX86_64PointerSize).SizeValue()));
1038       break;
1039   }
1040   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1041 
1042   DCHECK(!IsLeafMethod());
1043 }
1044 
1045 void CodeGeneratorX86_64::GenerateVirtualCall(
1046     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1047   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1048   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1049       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1050 
1051   // Use the calling convention instead of the location of the receiver, as
1052   // intrinsics may have put the receiver in a different register. In the intrinsics
1053   // slow path, the arguments have been moved to the right place, so here we are
1054   // guaranteed that the receiver is the first register of the calling convention.
1055   InvokeDexCallingConvention calling_convention;
1056   Register receiver = calling_convention.GetRegisterAt(0);
1057 
1058   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1059   // /* HeapReference<Class> */ temp = receiver->klass_
1060   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1061   MaybeRecordImplicitNullCheck(invoke);
1062   // Instead of simply (possibly) unpoisoning `temp` here, we should
1063   // emit a read barrier for the previous class reference load.
1064   // However this is not required in practice, as this is an
1065   // intermediate/temporary reference and because the current
1066   // concurrent copying collector keeps the from-space memory
1067   // intact/accessible until the end of the marking phase (the
1068   // concurrent copying collector may not in the future).
1069   __ MaybeUnpoisonHeapReference(temp);
1070   // temp = temp->GetMethodAt(method_offset);
1071   __ movq(temp, Address(temp, method_offset));
1072   // call temp->GetEntryPoint();
1073   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1074       kX86_64PointerSize).SizeValue()));
1075   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1076 }
1077 
1078 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1079   boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1080   __ Bind(&boot_image_intrinsic_patches_.back().label);
1081 }
1082 
1083 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1084   boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1085   __ Bind(&boot_image_method_patches_.back().label);
1086 }
1087 
1088 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1089   boot_image_method_patches_.emplace_back(
1090       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1091   __ Bind(&boot_image_method_patches_.back().label);
1092 }
1093 
1094 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1095   method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1096   __ Bind(&method_bss_entry_patches_.back().label);
1097 }
1098 
1099 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1100   boot_image_type_patches_.emplace_back(
1101       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1102   __ Bind(&boot_image_type_patches_.back().label);
1103 }
1104 
1105 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1106   type_bss_entry_patches_.emplace_back(
1107       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1108   return &type_bss_entry_patches_.back().label;
1109 }
1110 
1111 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1112   boot_image_string_patches_.emplace_back(
1113       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1114   __ Bind(&boot_image_string_patches_.back().label);
1115 }
1116 
1117 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1118   string_bss_entry_patches_.emplace_back(
1119       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1120   return &string_bss_entry_patches_.back().label;
1121 }
1122 
1123 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1124   if (GetCompilerOptions().IsBootImage()) {
1125     __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1126     RecordBootImageIntrinsicPatch(boot_image_reference);
1127   } else if (GetCompilerOptions().GetCompilePic()) {
1128     __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1129     RecordBootImageRelRoPatch(boot_image_reference);
1130   } else {
1131     DCHECK(Runtime::Current()->UseJitCompilation());
1132     gc::Heap* heap = Runtime::Current()->GetHeap();
1133     DCHECK(!heap->GetBootImageSpaces().empty());
1134     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1135     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1136   }
1137 }
1138 
1139 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
1140                                                        uint32_t boot_image_offset) {
1141   DCHECK(invoke->IsStatic());
1142   InvokeRuntimeCallingConvention calling_convention;
1143   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
1144   if (GetCompilerOptions().IsBootImage()) {
1145     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
1146     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1147     __ leal(argument,
1148             Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1149     MethodReference target_method = invoke->GetTargetMethod();
1150     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1151     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1152     __ Bind(&boot_image_type_patches_.back().label);
1153   } else {
1154     LoadBootImageAddress(argument, boot_image_offset);
1155   }
1156   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
1157   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
1158 }
1159 
1160 // The label points to the end of the "movl" or another instruction but the literal offset
1161 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1162 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1163 
1164 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
1165 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1166     const ArenaDeque<PatchInfo<Label>>& infos,
1167     ArenaVector<linker::LinkerPatch>* linker_patches) {
1168   for (const PatchInfo<Label>& info : infos) {
1169     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1170     linker_patches->push_back(
1171         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1172   }
1173 }
1174 
1175 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
1176 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1177                                      const DexFile* target_dex_file,
1178                                      uint32_t pc_insn_offset,
1179                                      uint32_t boot_image_offset) {
1180   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1181   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1182 }
1183 
1184 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1185   DCHECK(linker_patches->empty());
1186   size_t size =
1187       boot_image_method_patches_.size() +
1188       method_bss_entry_patches_.size() +
1189       boot_image_type_patches_.size() +
1190       type_bss_entry_patches_.size() +
1191       boot_image_string_patches_.size() +
1192       string_bss_entry_patches_.size() +
1193       boot_image_intrinsic_patches_.size();
1194   linker_patches->reserve(size);
1195   if (GetCompilerOptions().IsBootImage()) {
1196     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1197         boot_image_method_patches_, linker_patches);
1198     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1199         boot_image_type_patches_, linker_patches);
1200     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1201         boot_image_string_patches_, linker_patches);
1202     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1203         boot_image_intrinsic_patches_, linker_patches);
1204   } else {
1205     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1206         boot_image_method_patches_, linker_patches);
1207     DCHECK(boot_image_type_patches_.empty());
1208     DCHECK(boot_image_string_patches_.empty());
1209     DCHECK(boot_image_intrinsic_patches_.empty());
1210   }
1211   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1212       method_bss_entry_patches_, linker_patches);
1213   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1214       type_bss_entry_patches_, linker_patches);
1215   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1216       string_bss_entry_patches_, linker_patches);
1217   DCHECK_EQ(size, linker_patches->size());
1218 }
1219 
1220 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1221   stream << Register(reg);
1222 }
1223 
1224 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1225   stream << FloatRegister(reg);
1226 }
1227 
1228 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1229   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1230 }
1231 
1232 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1233   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1234   return kX86_64WordSize;
1235 }
1236 
1237 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1238   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1239   return kX86_64WordSize;
1240 }
1241 
1242 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1243   if (GetGraph()->HasSIMD()) {
1244     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1245   } else {
1246     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1247   }
1248   return GetFloatingPointSpillSlotSize();
1249 }
1250 
1251 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1252   if (GetGraph()->HasSIMD()) {
1253     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1254   } else {
1255     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1256   }
1257   return GetFloatingPointSpillSlotSize();
1258 }
1259 
1260 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1261                                         HInstruction* instruction,
1262                                         uint32_t dex_pc,
1263                                         SlowPathCode* slow_path) {
1264   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1265   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1266   if (EntrypointRequiresStackMap(entrypoint)) {
1267     RecordPcInfo(instruction, dex_pc, slow_path);
1268   }
1269 }
1270 
1271 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1272                                                               HInstruction* instruction,
1273                                                               SlowPathCode* slow_path) {
1274   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1275   GenerateInvokeRuntime(entry_point_offset);
1276 }
1277 
1278 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1279   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1280 }
1281 
1282 static constexpr int kNumberOfCpuRegisterPairs = 0;
1283 // Use a fake return address register to mimic Quick.
1284 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
1285 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1286                                          const CompilerOptions& compiler_options,
1287                                          OptimizingCompilerStats* stats)
1288       : CodeGenerator(graph,
1289                       kNumberOfCpuRegisters,
1290                       kNumberOfFloatRegisters,
1291                       kNumberOfCpuRegisterPairs,
1292                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1293                                           arraysize(kCoreCalleeSaves))
1294                           | (1 << kFakeReturnRegister),
1295                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1296                                           arraysize(kFpuCalleeSaves)),
1297                       compiler_options,
1298                       stats),
1299         block_labels_(nullptr),
1300         location_builder_(graph, this),
1301         instruction_visitor_(graph, this),
1302         move_resolver_(graph->GetAllocator(), this),
1303         assembler_(graph->GetAllocator()),
1304         constant_area_start_(0),
1305         boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1306         method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1307         boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1308         type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1309         boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1310         string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1311         boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1312         jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1313         jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1314         fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1315   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1316 }
1317 
1318 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1319                                                                CodeGeneratorX86_64* codegen)
1320       : InstructionCodeGenerator(graph, codegen),
1321         assembler_(codegen->GetAssembler()),
1322         codegen_(codegen) {}
1323 
1324 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1325   // Stack register is always reserved.
1326   blocked_core_registers_[RSP] = true;
1327 
1328   // Block the register used as TMP.
1329   blocked_core_registers_[TMP] = true;
1330 }
1331 
1332 static dwarf::Reg DWARFReg(Register reg) {
1333   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1334 }
1335 
1336 static dwarf::Reg DWARFReg(FloatRegister reg) {
1337   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1338 }
1339 
1340 void CodeGeneratorX86_64::GenerateFrameEntry() {
1341   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1342   __ Bind(&frame_entry_label_);
1343   bool skip_overflow_check = IsLeafMethod()
1344       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1345   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1346 
1347   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1348     __ addw(Address(CpuRegister(kMethodRegisterArgument),
1349                     ArtMethod::HotnessCountOffset().Int32Value()),
1350             Immediate(1));
1351   }
1352 
1353   if (!skip_overflow_check) {
1354     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1355     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1356     RecordPcInfo(nullptr, 0);
1357   }
1358 
1359   if (HasEmptyFrame()) {
1360     return;
1361   }
1362 
1363   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1364     Register reg = kCoreCalleeSaves[i];
1365     if (allocated_registers_.ContainsCoreRegister(reg)) {
1366       __ pushq(CpuRegister(reg));
1367       __ cfi().AdjustCFAOffset(kX86_64WordSize);
1368       __ cfi().RelOffset(DWARFReg(reg), 0);
1369     }
1370   }
1371 
1372   int adjust = GetFrameSize() - GetCoreSpillSize();
1373   __ subq(CpuRegister(RSP), Immediate(adjust));
1374   __ cfi().AdjustCFAOffset(adjust);
1375   uint32_t xmm_spill_location = GetFpuSpillStart();
1376   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1377 
1378   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1379     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1380       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1381       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1382       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1383     }
1384   }
1385 
1386   // Save the current method if we need it. Note that we do not
1387   // do this in HCurrentMethod, as the instruction might have been removed
1388   // in the SSA graph.
1389   if (RequiresCurrentMethod()) {
1390     __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1391             CpuRegister(kMethodRegisterArgument));
1392   }
1393 
1394   if (GetGraph()->HasShouldDeoptimizeFlag()) {
1395     // Initialize should_deoptimize flag to 0.
1396     __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1397   }
1398 }
1399 
1400 void CodeGeneratorX86_64::GenerateFrameExit() {
1401   __ cfi().RememberState();
1402   if (!HasEmptyFrame()) {
1403     uint32_t xmm_spill_location = GetFpuSpillStart();
1404     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1405     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1406       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1407         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1408         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1409         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1410       }
1411     }
1412 
1413     int adjust = GetFrameSize() - GetCoreSpillSize();
1414     __ addq(CpuRegister(RSP), Immediate(adjust));
1415     __ cfi().AdjustCFAOffset(-adjust);
1416 
1417     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1418       Register reg = kCoreCalleeSaves[i];
1419       if (allocated_registers_.ContainsCoreRegister(reg)) {
1420         __ popq(CpuRegister(reg));
1421         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1422         __ cfi().Restore(DWARFReg(reg));
1423       }
1424     }
1425   }
1426   __ ret();
1427   __ cfi().RestoreState();
1428   __ cfi().DefCFAOffset(GetFrameSize());
1429 }
1430 
1431 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1432   __ Bind(GetLabelOf(block));
1433 }
1434 
1435 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1436   if (source.Equals(destination)) {
1437     return;
1438   }
1439   if (destination.IsRegister()) {
1440     CpuRegister dest = destination.AsRegister<CpuRegister>();
1441     if (source.IsRegister()) {
1442       __ movq(dest, source.AsRegister<CpuRegister>());
1443     } else if (source.IsFpuRegister()) {
1444       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1445     } else if (source.IsStackSlot()) {
1446       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1447     } else if (source.IsConstant()) {
1448       HConstant* constant = source.GetConstant();
1449       if (constant->IsLongConstant()) {
1450         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1451       } else {
1452         Load32BitValue(dest, GetInt32ValueOf(constant));
1453       }
1454     } else {
1455       DCHECK(source.IsDoubleStackSlot());
1456       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1457     }
1458   } else if (destination.IsFpuRegister()) {
1459     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1460     if (source.IsRegister()) {
1461       __ movd(dest, source.AsRegister<CpuRegister>());
1462     } else if (source.IsFpuRegister()) {
1463       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1464     } else if (source.IsConstant()) {
1465       HConstant* constant = source.GetConstant();
1466       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1467       if (constant->IsFloatConstant()) {
1468         Load32BitValue(dest, static_cast<int32_t>(value));
1469       } else {
1470         Load64BitValue(dest, value);
1471       }
1472     } else if (source.IsStackSlot()) {
1473       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1474     } else {
1475       DCHECK(source.IsDoubleStackSlot());
1476       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1477     }
1478   } else if (destination.IsStackSlot()) {
1479     if (source.IsRegister()) {
1480       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1481               source.AsRegister<CpuRegister>());
1482     } else if (source.IsFpuRegister()) {
1483       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1484                source.AsFpuRegister<XmmRegister>());
1485     } else if (source.IsConstant()) {
1486       HConstant* constant = source.GetConstant();
1487       int32_t value = GetInt32ValueOf(constant);
1488       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1489     } else {
1490       DCHECK(source.IsStackSlot()) << source;
1491       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1492       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1493     }
1494   } else {
1495     DCHECK(destination.IsDoubleStackSlot());
1496     if (source.IsRegister()) {
1497       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1498               source.AsRegister<CpuRegister>());
1499     } else if (source.IsFpuRegister()) {
1500       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1501                source.AsFpuRegister<XmmRegister>());
1502     } else if (source.IsConstant()) {
1503       HConstant* constant = source.GetConstant();
1504       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1505       int64_t value = GetInt64ValueOf(constant);
1506       Store64BitValueToStack(destination, value);
1507     } else {
1508       DCHECK(source.IsDoubleStackSlot());
1509       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1510       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1511     }
1512   }
1513 }
1514 
1515 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1516   DCHECK(location.IsRegister());
1517   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1518 }
1519 
1520 void CodeGeneratorX86_64::MoveLocation(
1521     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1522   Move(dst, src);
1523 }
1524 
1525 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1526   if (location.IsRegister()) {
1527     locations->AddTemp(location);
1528   } else {
1529     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1530   }
1531 }
1532 
1533 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1534   if (successor->IsExitBlock()) {
1535     DCHECK(got->GetPrevious()->AlwaysThrows());
1536     return;  // no code needed
1537   }
1538 
1539   HBasicBlock* block = got->GetBlock();
1540   HInstruction* previous = got->GetPrevious();
1541 
1542   HLoopInformation* info = block->GetLoopInformation();
1543   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1544     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1545       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
1546       __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
1547               Immediate(1));
1548     }
1549     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1550     return;
1551   }
1552 
1553   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1554     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1555   }
1556   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1557     __ jmp(codegen_->GetLabelOf(successor));
1558   }
1559 }
1560 
1561 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1562   got->SetLocations(nullptr);
1563 }
1564 
1565 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1566   HandleGoto(got, got->GetSuccessor());
1567 }
1568 
1569 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1570   try_boundary->SetLocations(nullptr);
1571 }
1572 
1573 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1574   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1575   if (!successor->IsExitBlock()) {
1576     HandleGoto(try_boundary, successor);
1577   }
1578 }
1579 
1580 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1581   exit->SetLocations(nullptr);
1582 }
1583 
1584 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1585 }
1586 
1587 template<class LabelType>
1588 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1589                                                      LabelType* true_label,
1590                                                      LabelType* false_label) {
1591   if (cond->IsFPConditionTrueIfNaN()) {
1592     __ j(kUnordered, true_label);
1593   } else if (cond->IsFPConditionFalseIfNaN()) {
1594     __ j(kUnordered, false_label);
1595   }
1596   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1597 }
1598 
1599 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1600   LocationSummary* locations = condition->GetLocations();
1601 
1602   Location left = locations->InAt(0);
1603   Location right = locations->InAt(1);
1604   DataType::Type type = condition->InputAt(0)->GetType();
1605   switch (type) {
1606     case DataType::Type::kBool:
1607     case DataType::Type::kUint8:
1608     case DataType::Type::kInt8:
1609     case DataType::Type::kUint16:
1610     case DataType::Type::kInt16:
1611     case DataType::Type::kInt32:
1612     case DataType::Type::kReference: {
1613       codegen_->GenerateIntCompare(left, right);
1614       break;
1615     }
1616     case DataType::Type::kInt64: {
1617       codegen_->GenerateLongCompare(left, right);
1618       break;
1619     }
1620     case DataType::Type::kFloat32: {
1621       if (right.IsFpuRegister()) {
1622         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1623       } else if (right.IsConstant()) {
1624         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1625                    codegen_->LiteralFloatAddress(
1626                      right.GetConstant()->AsFloatConstant()->GetValue()));
1627       } else {
1628         DCHECK(right.IsStackSlot());
1629         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1630                    Address(CpuRegister(RSP), right.GetStackIndex()));
1631       }
1632       break;
1633     }
1634     case DataType::Type::kFloat64: {
1635       if (right.IsFpuRegister()) {
1636         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1637       } else if (right.IsConstant()) {
1638         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1639                    codegen_->LiteralDoubleAddress(
1640                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1641       } else {
1642         DCHECK(right.IsDoubleStackSlot());
1643         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1644                    Address(CpuRegister(RSP), right.GetStackIndex()));
1645       }
1646       break;
1647     }
1648     default:
1649       LOG(FATAL) << "Unexpected condition type " << type;
1650   }
1651 }
1652 
1653 template<class LabelType>
1654 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1655                                                                   LabelType* true_target_in,
1656                                                                   LabelType* false_target_in) {
1657   // Generated branching requires both targets to be explicit. If either of the
1658   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1659   LabelType fallthrough_target;
1660   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1661   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1662 
1663   // Generate the comparison to set the CC.
1664   GenerateCompareTest(condition);
1665 
1666   // Now generate the correct jump(s).
1667   DataType::Type type = condition->InputAt(0)->GetType();
1668   switch (type) {
1669     case DataType::Type::kInt64: {
1670       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1671       break;
1672     }
1673     case DataType::Type::kFloat32: {
1674       GenerateFPJumps(condition, true_target, false_target);
1675       break;
1676     }
1677     case DataType::Type::kFloat64: {
1678       GenerateFPJumps(condition, true_target, false_target);
1679       break;
1680     }
1681     default:
1682       LOG(FATAL) << "Unexpected condition type " << type;
1683   }
1684 
1685   if (false_target != &fallthrough_target) {
1686     __ jmp(false_target);
1687   }
1688 
1689   if (fallthrough_target.IsLinked()) {
1690     __ Bind(&fallthrough_target);
1691   }
1692 }
1693 
1694 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1695   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1696   // are set only strictly before `branch`. We can't use the eflags on long
1697   // conditions if they are materialized due to the complex branching.
1698   return cond->IsCondition() &&
1699          cond->GetNext() == branch &&
1700          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1701 }
1702 
1703 template<class LabelType>
1704 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1705                                                            size_t condition_input_index,
1706                                                            LabelType* true_target,
1707                                                            LabelType* false_target) {
1708   HInstruction* cond = instruction->InputAt(condition_input_index);
1709 
1710   if (true_target == nullptr && false_target == nullptr) {
1711     // Nothing to do. The code always falls through.
1712     return;
1713   } else if (cond->IsIntConstant()) {
1714     // Constant condition, statically compared against "true" (integer value 1).
1715     if (cond->AsIntConstant()->IsTrue()) {
1716       if (true_target != nullptr) {
1717         __ jmp(true_target);
1718       }
1719     } else {
1720       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1721       if (false_target != nullptr) {
1722         __ jmp(false_target);
1723       }
1724     }
1725     return;
1726   }
1727 
1728   // The following code generates these patterns:
1729   //  (1) true_target == nullptr && false_target != nullptr
1730   //        - opposite condition true => branch to false_target
1731   //  (2) true_target != nullptr && false_target == nullptr
1732   //        - condition true => branch to true_target
1733   //  (3) true_target != nullptr && false_target != nullptr
1734   //        - condition true => branch to true_target
1735   //        - branch to false_target
1736   if (IsBooleanValueOrMaterializedCondition(cond)) {
1737     if (AreEflagsSetFrom(cond, instruction)) {
1738       if (true_target == nullptr) {
1739         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1740       } else {
1741         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1742       }
1743     } else {
1744       // Materialized condition, compare against 0.
1745       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1746       if (lhs.IsRegister()) {
1747         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1748       } else {
1749         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1750       }
1751       if (true_target == nullptr) {
1752         __ j(kEqual, false_target);
1753       } else {
1754         __ j(kNotEqual, true_target);
1755       }
1756     }
1757   } else {
1758     // Condition has not been materialized, use its inputs as the
1759     // comparison and its condition as the branch condition.
1760     HCondition* condition = cond->AsCondition();
1761 
1762     // If this is a long or FP comparison that has been folded into
1763     // the HCondition, generate the comparison directly.
1764     DataType::Type type = condition->InputAt(0)->GetType();
1765     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1766       GenerateCompareTestAndBranch(condition, true_target, false_target);
1767       return;
1768     }
1769 
1770     Location lhs = condition->GetLocations()->InAt(0);
1771     Location rhs = condition->GetLocations()->InAt(1);
1772     codegen_->GenerateIntCompare(lhs, rhs);
1773       if (true_target == nullptr) {
1774       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1775     } else {
1776       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1777     }
1778   }
1779 
1780   // If neither branch falls through (case 3), the conditional branch to `true_target`
1781   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1782   if (true_target != nullptr && false_target != nullptr) {
1783     __ jmp(false_target);
1784   }
1785 }
1786 
1787 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1788   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1789   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1790     locations->SetInAt(0, Location::Any());
1791   }
1792 }
1793 
1794 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1795   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1796   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1797   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1798       nullptr : codegen_->GetLabelOf(true_successor);
1799   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1800       nullptr : codegen_->GetLabelOf(false_successor);
1801   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1802 }
1803 
1804 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1805   LocationSummary* locations = new (GetGraph()->GetAllocator())
1806       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1807   InvokeRuntimeCallingConvention calling_convention;
1808   RegisterSet caller_saves = RegisterSet::Empty();
1809   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1810   locations->SetCustomSlowPathCallerSaves(caller_saves);
1811   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1812     locations->SetInAt(0, Location::Any());
1813   }
1814 }
1815 
1816 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1817   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1818   GenerateTestAndBranch<Label>(deoptimize,
1819                                /* condition_input_index= */ 0,
1820                                slow_path->GetEntryLabel(),
1821                                /* false_target= */ nullptr);
1822 }
1823 
1824 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1825   LocationSummary* locations = new (GetGraph()->GetAllocator())
1826       LocationSummary(flag, LocationSummary::kNoCall);
1827   locations->SetOut(Location::RequiresRegister());
1828 }
1829 
1830 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1831   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1832           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1833 }
1834 
1835 static bool SelectCanUseCMOV(HSelect* select) {
1836   // There are no conditional move instructions for XMMs.
1837   if (DataType::IsFloatingPointType(select->GetType())) {
1838     return false;
1839   }
1840 
1841   // A FP condition doesn't generate the single CC that we need.
1842   HInstruction* condition = select->GetCondition();
1843   if (condition->IsCondition() &&
1844       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1845     return false;
1846   }
1847 
1848   // We can generate a CMOV for this Select.
1849   return true;
1850 }
1851 
1852 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1853   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1854   if (DataType::IsFloatingPointType(select->GetType())) {
1855     locations->SetInAt(0, Location::RequiresFpuRegister());
1856     locations->SetInAt(1, Location::Any());
1857   } else {
1858     locations->SetInAt(0, Location::RequiresRegister());
1859     if (SelectCanUseCMOV(select)) {
1860       if (select->InputAt(1)->IsConstant()) {
1861         locations->SetInAt(1, Location::RequiresRegister());
1862       } else {
1863         locations->SetInAt(1, Location::Any());
1864       }
1865     } else {
1866       locations->SetInAt(1, Location::Any());
1867     }
1868   }
1869   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1870     locations->SetInAt(2, Location::RequiresRegister());
1871   }
1872   locations->SetOut(Location::SameAsFirstInput());
1873 }
1874 
1875 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1876   LocationSummary* locations = select->GetLocations();
1877   if (SelectCanUseCMOV(select)) {
1878     // If both the condition and the source types are integer, we can generate
1879     // a CMOV to implement Select.
1880     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1881     Location value_true_loc = locations->InAt(1);
1882     DCHECK(locations->InAt(0).Equals(locations->Out()));
1883 
1884     HInstruction* select_condition = select->GetCondition();
1885     Condition cond = kNotEqual;
1886 
1887     // Figure out how to test the 'condition'.
1888     if (select_condition->IsCondition()) {
1889       HCondition* condition = select_condition->AsCondition();
1890       if (!condition->IsEmittedAtUseSite()) {
1891         // This was a previously materialized condition.
1892         // Can we use the existing condition code?
1893         if (AreEflagsSetFrom(condition, select)) {
1894           // Materialization was the previous instruction.  Condition codes are right.
1895           cond = X86_64IntegerCondition(condition->GetCondition());
1896         } else {
1897           // No, we have to recreate the condition code.
1898           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1899           __ testl(cond_reg, cond_reg);
1900         }
1901       } else {
1902         GenerateCompareTest(condition);
1903         cond = X86_64IntegerCondition(condition->GetCondition());
1904       }
1905     } else {
1906       // Must be a Boolean condition, which needs to be compared to 0.
1907       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1908       __ testl(cond_reg, cond_reg);
1909     }
1910 
1911     // If the condition is true, overwrite the output, which already contains false.
1912     // Generate the correct sized CMOV.
1913     bool is_64_bit = DataType::Is64BitType(select->GetType());
1914     if (value_true_loc.IsRegister()) {
1915       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1916     } else {
1917       __ cmov(cond,
1918               value_false,
1919               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1920     }
1921   } else {
1922     NearLabel false_target;
1923     GenerateTestAndBranch<NearLabel>(select,
1924                                      /* condition_input_index= */ 2,
1925                                      /* true_target= */ nullptr,
1926                                      &false_target);
1927     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1928     __ Bind(&false_target);
1929   }
1930 }
1931 
1932 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1933   new (GetGraph()->GetAllocator()) LocationSummary(info);
1934 }
1935 
1936 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1937   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1938 }
1939 
1940 void CodeGeneratorX86_64::GenerateNop() {
1941   __ nop();
1942 }
1943 
1944 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1945   LocationSummary* locations =
1946       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1947   // Handle the long/FP comparisons made in instruction simplification.
1948   switch (cond->InputAt(0)->GetType()) {
1949     case DataType::Type::kInt64:
1950       locations->SetInAt(0, Location::RequiresRegister());
1951       locations->SetInAt(1, Location::Any());
1952       break;
1953     case DataType::Type::kFloat32:
1954     case DataType::Type::kFloat64:
1955       locations->SetInAt(0, Location::RequiresFpuRegister());
1956       locations->SetInAt(1, Location::Any());
1957       break;
1958     default:
1959       locations->SetInAt(0, Location::RequiresRegister());
1960       locations->SetInAt(1, Location::Any());
1961       break;
1962   }
1963   if (!cond->IsEmittedAtUseSite()) {
1964     locations->SetOut(Location::RequiresRegister());
1965   }
1966 }
1967 
1968 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1969   if (cond->IsEmittedAtUseSite()) {
1970     return;
1971   }
1972 
1973   LocationSummary* locations = cond->GetLocations();
1974   Location lhs = locations->InAt(0);
1975   Location rhs = locations->InAt(1);
1976   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1977   NearLabel true_label, false_label;
1978 
1979   switch (cond->InputAt(0)->GetType()) {
1980     default:
1981       // Integer case.
1982 
1983       // Clear output register: setcc only sets the low byte.
1984       __ xorl(reg, reg);
1985 
1986       codegen_->GenerateIntCompare(lhs, rhs);
1987       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1988       return;
1989     case DataType::Type::kInt64:
1990       // Clear output register: setcc only sets the low byte.
1991       __ xorl(reg, reg);
1992 
1993       codegen_->GenerateLongCompare(lhs, rhs);
1994       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1995       return;
1996     case DataType::Type::kFloat32: {
1997       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1998       if (rhs.IsConstant()) {
1999         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2000         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2001       } else if (rhs.IsStackSlot()) {
2002         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2003       } else {
2004         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2005       }
2006       GenerateFPJumps(cond, &true_label, &false_label);
2007       break;
2008     }
2009     case DataType::Type::kFloat64: {
2010       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2011       if (rhs.IsConstant()) {
2012         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2013         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2014       } else if (rhs.IsDoubleStackSlot()) {
2015         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2016       } else {
2017         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2018       }
2019       GenerateFPJumps(cond, &true_label, &false_label);
2020       break;
2021     }
2022   }
2023 
2024   // Convert the jumps into the result.
2025   NearLabel done_label;
2026 
2027   // False case: result = 0.
2028   __ Bind(&false_label);
2029   __ xorl(reg, reg);
2030   __ jmp(&done_label);
2031 
2032   // True case: result = 1.
2033   __ Bind(&true_label);
2034   __ movl(reg, Immediate(1));
2035   __ Bind(&done_label);
2036 }
2037 
2038 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2039   HandleCondition(comp);
2040 }
2041 
2042 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2043   HandleCondition(comp);
2044 }
2045 
2046 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2047   HandleCondition(comp);
2048 }
2049 
2050 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2051   HandleCondition(comp);
2052 }
2053 
2054 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2055   HandleCondition(comp);
2056 }
2057 
2058 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2059   HandleCondition(comp);
2060 }
2061 
2062 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2063   HandleCondition(comp);
2064 }
2065 
2066 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2067   HandleCondition(comp);
2068 }
2069 
2070 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2071   HandleCondition(comp);
2072 }
2073 
2074 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2075   HandleCondition(comp);
2076 }
2077 
2078 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2079   HandleCondition(comp);
2080 }
2081 
2082 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2083   HandleCondition(comp);
2084 }
2085 
2086 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2087   HandleCondition(comp);
2088 }
2089 
2090 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2091   HandleCondition(comp);
2092 }
2093 
2094 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2095   HandleCondition(comp);
2096 }
2097 
2098 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2099   HandleCondition(comp);
2100 }
2101 
2102 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2103   HandleCondition(comp);
2104 }
2105 
2106 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2107   HandleCondition(comp);
2108 }
2109 
2110 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2111   HandleCondition(comp);
2112 }
2113 
2114 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2115   HandleCondition(comp);
2116 }
2117 
2118 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2119   LocationSummary* locations =
2120       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2121   switch (compare->InputAt(0)->GetType()) {
2122     case DataType::Type::kBool:
2123     case DataType::Type::kUint8:
2124     case DataType::Type::kInt8:
2125     case DataType::Type::kUint16:
2126     case DataType::Type::kInt16:
2127     case DataType::Type::kInt32:
2128     case DataType::Type::kInt64: {
2129       locations->SetInAt(0, Location::RequiresRegister());
2130       locations->SetInAt(1, Location::Any());
2131       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2132       break;
2133     }
2134     case DataType::Type::kFloat32:
2135     case DataType::Type::kFloat64: {
2136       locations->SetInAt(0, Location::RequiresFpuRegister());
2137       locations->SetInAt(1, Location::Any());
2138       locations->SetOut(Location::RequiresRegister());
2139       break;
2140     }
2141     default:
2142       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2143   }
2144 }
2145 
2146 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2147   LocationSummary* locations = compare->GetLocations();
2148   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2149   Location left = locations->InAt(0);
2150   Location right = locations->InAt(1);
2151 
2152   NearLabel less, greater, done;
2153   DataType::Type type = compare->InputAt(0)->GetType();
2154   Condition less_cond = kLess;
2155 
2156   switch (type) {
2157     case DataType::Type::kBool:
2158     case DataType::Type::kUint8:
2159     case DataType::Type::kInt8:
2160     case DataType::Type::kUint16:
2161     case DataType::Type::kInt16:
2162     case DataType::Type::kInt32: {
2163       codegen_->GenerateIntCompare(left, right);
2164       break;
2165     }
2166     case DataType::Type::kInt64: {
2167       codegen_->GenerateLongCompare(left, right);
2168       break;
2169     }
2170     case DataType::Type::kFloat32: {
2171       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2172       if (right.IsConstant()) {
2173         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2174         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2175       } else if (right.IsStackSlot()) {
2176         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2177       } else {
2178         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2179       }
2180       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2181       less_cond = kBelow;  //  ucomis{s,d} sets CF
2182       break;
2183     }
2184     case DataType::Type::kFloat64: {
2185       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2186       if (right.IsConstant()) {
2187         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2188         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2189       } else if (right.IsDoubleStackSlot()) {
2190         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2191       } else {
2192         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2193       }
2194       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2195       less_cond = kBelow;  //  ucomis{s,d} sets CF
2196       break;
2197     }
2198     default:
2199       LOG(FATAL) << "Unexpected compare type " << type;
2200   }
2201 
2202   __ movl(out, Immediate(0));
2203   __ j(kEqual, &done);
2204   __ j(less_cond, &less);
2205 
2206   __ Bind(&greater);
2207   __ movl(out, Immediate(1));
2208   __ jmp(&done);
2209 
2210   __ Bind(&less);
2211   __ movl(out, Immediate(-1));
2212 
2213   __ Bind(&done);
2214 }
2215 
2216 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2217   LocationSummary* locations =
2218       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2219   locations->SetOut(Location::ConstantLocation(constant));
2220 }
2221 
2222 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2223   // Will be generated at use site.
2224 }
2225 
2226 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2227   LocationSummary* locations =
2228       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2229   locations->SetOut(Location::ConstantLocation(constant));
2230 }
2231 
2232 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2233   // Will be generated at use site.
2234 }
2235 
2236 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2237   LocationSummary* locations =
2238       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2239   locations->SetOut(Location::ConstantLocation(constant));
2240 }
2241 
2242 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2243   // Will be generated at use site.
2244 }
2245 
2246 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2247   LocationSummary* locations =
2248       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2249   locations->SetOut(Location::ConstantLocation(constant));
2250 }
2251 
2252 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2253   // Will be generated at use site.
2254 }
2255 
2256 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2257   LocationSummary* locations =
2258       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2259   locations->SetOut(Location::ConstantLocation(constant));
2260 }
2261 
2262 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2263     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2264   // Will be generated at use site.
2265 }
2266 
2267 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2268   constructor_fence->SetLocations(nullptr);
2269 }
2270 
2271 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2272     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2273   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2274 }
2275 
2276 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2277   memory_barrier->SetLocations(nullptr);
2278 }
2279 
2280 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2281   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2282 }
2283 
2284 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2285   ret->SetLocations(nullptr);
2286 }
2287 
2288 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2289   codegen_->GenerateFrameExit();
2290 }
2291 
2292 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2293   LocationSummary* locations =
2294       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2295   switch (ret->InputAt(0)->GetType()) {
2296     case DataType::Type::kReference:
2297     case DataType::Type::kBool:
2298     case DataType::Type::kUint8:
2299     case DataType::Type::kInt8:
2300     case DataType::Type::kUint16:
2301     case DataType::Type::kInt16:
2302     case DataType::Type::kInt32:
2303     case DataType::Type::kInt64:
2304       locations->SetInAt(0, Location::RegisterLocation(RAX));
2305       break;
2306 
2307     case DataType::Type::kFloat32:
2308     case DataType::Type::kFloat64:
2309       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2310       break;
2311 
2312     default:
2313       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2314   }
2315 }
2316 
2317 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2318   if (kIsDebugBuild) {
2319     switch (ret->InputAt(0)->GetType()) {
2320       case DataType::Type::kReference:
2321       case DataType::Type::kBool:
2322       case DataType::Type::kUint8:
2323       case DataType::Type::kInt8:
2324       case DataType::Type::kUint16:
2325       case DataType::Type::kInt16:
2326       case DataType::Type::kInt32:
2327       case DataType::Type::kInt64:
2328         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2329         break;
2330 
2331       case DataType::Type::kFloat32:
2332       case DataType::Type::kFloat64:
2333         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2334                   XMM0);
2335         break;
2336 
2337       default:
2338         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2339     }
2340   }
2341   codegen_->GenerateFrameExit();
2342 }
2343 
2344 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2345   switch (type) {
2346     case DataType::Type::kReference:
2347     case DataType::Type::kBool:
2348     case DataType::Type::kUint8:
2349     case DataType::Type::kInt8:
2350     case DataType::Type::kUint16:
2351     case DataType::Type::kInt16:
2352     case DataType::Type::kUint32:
2353     case DataType::Type::kInt32:
2354     case DataType::Type::kUint64:
2355     case DataType::Type::kInt64:
2356       return Location::RegisterLocation(RAX);
2357 
2358     case DataType::Type::kVoid:
2359       return Location::NoLocation();
2360 
2361     case DataType::Type::kFloat64:
2362     case DataType::Type::kFloat32:
2363       return Location::FpuRegisterLocation(XMM0);
2364   }
2365 
2366   UNREACHABLE();
2367 }
2368 
2369 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2370   return Location::RegisterLocation(kMethodRegisterArgument);
2371 }
2372 
2373 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2374   switch (type) {
2375     case DataType::Type::kReference:
2376     case DataType::Type::kBool:
2377     case DataType::Type::kUint8:
2378     case DataType::Type::kInt8:
2379     case DataType::Type::kUint16:
2380     case DataType::Type::kInt16:
2381     case DataType::Type::kInt32: {
2382       uint32_t index = gp_index_++;
2383       stack_index_++;
2384       if (index < calling_convention.GetNumberOfRegisters()) {
2385         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2386       } else {
2387         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2388       }
2389     }
2390 
2391     case DataType::Type::kInt64: {
2392       uint32_t index = gp_index_;
2393       stack_index_ += 2;
2394       if (index < calling_convention.GetNumberOfRegisters()) {
2395         gp_index_ += 1;
2396         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2397       } else {
2398         gp_index_ += 2;
2399         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2400       }
2401     }
2402 
2403     case DataType::Type::kFloat32: {
2404       uint32_t index = float_index_++;
2405       stack_index_++;
2406       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2407         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2408       } else {
2409         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2410       }
2411     }
2412 
2413     case DataType::Type::kFloat64: {
2414       uint32_t index = float_index_++;
2415       stack_index_ += 2;
2416       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2417         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2418       } else {
2419         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2420       }
2421     }
2422 
2423     case DataType::Type::kUint32:
2424     case DataType::Type::kUint64:
2425     case DataType::Type::kVoid:
2426       LOG(FATAL) << "Unexpected parameter type " << type;
2427       UNREACHABLE();
2428   }
2429   return Location::NoLocation();
2430 }
2431 
2432 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2433   // The trampoline uses the same calling convention as dex calling conventions,
2434   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2435   // the method_idx.
2436   HandleInvoke(invoke);
2437 }
2438 
2439 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2440   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2441 }
2442 
2443 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2444   // Explicit clinit checks triggered by static invokes must have been pruned by
2445   // art::PrepareForRegisterAllocation.
2446   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2447 
2448   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2449   if (intrinsic.TryDispatch(invoke)) {
2450     return;
2451   }
2452 
2453   HandleInvoke(invoke);
2454 }
2455 
2456 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2457   if (invoke->GetLocations()->Intrinsified()) {
2458     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2459     intrinsic.Dispatch(invoke);
2460     return true;
2461   }
2462   return false;
2463 }
2464 
2465 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2466   // Explicit clinit checks triggered by static invokes must have been pruned by
2467   // art::PrepareForRegisterAllocation.
2468   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2469 
2470   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2471     return;
2472   }
2473 
2474   LocationSummary* locations = invoke->GetLocations();
2475   codegen_->GenerateStaticOrDirectCall(
2476       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2477 }
2478 
2479 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2480   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2481   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2482 }
2483 
2484 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2485   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2486   if (intrinsic.TryDispatch(invoke)) {
2487     return;
2488   }
2489 
2490   HandleInvoke(invoke);
2491 }
2492 
2493 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2494   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2495     return;
2496   }
2497 
2498   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2499   DCHECK(!codegen_->IsLeafMethod());
2500 }
2501 
2502 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2503   HandleInvoke(invoke);
2504   // Add the hidden argument.
2505   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2506 }
2507 
2508 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2509   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2510   LocationSummary* locations = invoke->GetLocations();
2511   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2512   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2513   Location receiver = locations->InAt(0);
2514   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2515 
2516   // Set the hidden argument. This is safe to do this here, as RAX
2517   // won't be modified thereafter, before the `call` instruction.
2518   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2519   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2520 
2521   if (receiver.IsStackSlot()) {
2522     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2523     // /* HeapReference<Class> */ temp = temp->klass_
2524     __ movl(temp, Address(temp, class_offset));
2525   } else {
2526     // /* HeapReference<Class> */ temp = receiver->klass_
2527     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2528   }
2529   codegen_->MaybeRecordImplicitNullCheck(invoke);
2530   // Instead of simply (possibly) unpoisoning `temp` here, we should
2531   // emit a read barrier for the previous class reference load.
2532   // However this is not required in practice, as this is an
2533   // intermediate/temporary reference and because the current
2534   // concurrent copying collector keeps the from-space memory
2535   // intact/accessible until the end of the marking phase (the
2536   // concurrent copying collector may not in the future).
2537   __ MaybeUnpoisonHeapReference(temp);
2538   // temp = temp->GetAddressOfIMT()
2539   __ movq(temp,
2540       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2541   // temp = temp->GetImtEntryAt(method_offset);
2542   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2543       invoke->GetImtIndex(), kX86_64PointerSize));
2544   // temp = temp->GetImtEntryAt(method_offset);
2545   __ movq(temp, Address(temp, method_offset));
2546   // call temp->GetEntryPoint();
2547   __ call(Address(
2548       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2549 
2550   DCHECK(!codegen_->IsLeafMethod());
2551   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2552 }
2553 
2554 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2555   HandleInvoke(invoke);
2556 }
2557 
2558 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2559   codegen_->GenerateInvokePolymorphicCall(invoke);
2560 }
2561 
2562 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2563   HandleInvoke(invoke);
2564 }
2565 
2566 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2567   codegen_->GenerateInvokeCustomCall(invoke);
2568 }
2569 
2570 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2571   LocationSummary* locations =
2572       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2573   switch (neg->GetResultType()) {
2574     case DataType::Type::kInt32:
2575     case DataType::Type::kInt64:
2576       locations->SetInAt(0, Location::RequiresRegister());
2577       locations->SetOut(Location::SameAsFirstInput());
2578       break;
2579 
2580     case DataType::Type::kFloat32:
2581     case DataType::Type::kFloat64:
2582       locations->SetInAt(0, Location::RequiresFpuRegister());
2583       locations->SetOut(Location::SameAsFirstInput());
2584       locations->AddTemp(Location::RequiresFpuRegister());
2585       break;
2586 
2587     default:
2588       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2589   }
2590 }
2591 
2592 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2593   LocationSummary* locations = neg->GetLocations();
2594   Location out = locations->Out();
2595   Location in = locations->InAt(0);
2596   switch (neg->GetResultType()) {
2597     case DataType::Type::kInt32:
2598       DCHECK(in.IsRegister());
2599       DCHECK(in.Equals(out));
2600       __ negl(out.AsRegister<CpuRegister>());
2601       break;
2602 
2603     case DataType::Type::kInt64:
2604       DCHECK(in.IsRegister());
2605       DCHECK(in.Equals(out));
2606       __ negq(out.AsRegister<CpuRegister>());
2607       break;
2608 
2609     case DataType::Type::kFloat32: {
2610       DCHECK(in.Equals(out));
2611       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2612       // Implement float negation with an exclusive or with value
2613       // 0x80000000 (mask for bit 31, representing the sign of a
2614       // single-precision floating-point number).
2615       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2616       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2617       break;
2618     }
2619 
2620     case DataType::Type::kFloat64: {
2621       DCHECK(in.Equals(out));
2622       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2623       // Implement double negation with an exclusive or with value
2624       // 0x8000000000000000 (mask for bit 63, representing the sign of
2625       // a double-precision floating-point number).
2626       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2627       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2628       break;
2629     }
2630 
2631     default:
2632       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2633   }
2634 }
2635 
2636 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2637   LocationSummary* locations =
2638       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2639   DataType::Type result_type = conversion->GetResultType();
2640   DataType::Type input_type = conversion->GetInputType();
2641   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2642       << input_type << " -> " << result_type;
2643 
2644   switch (result_type) {
2645     case DataType::Type::kUint8:
2646     case DataType::Type::kInt8:
2647     case DataType::Type::kUint16:
2648     case DataType::Type::kInt16:
2649       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2650       locations->SetInAt(0, Location::Any());
2651       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2652       break;
2653 
2654     case DataType::Type::kInt32:
2655       switch (input_type) {
2656         case DataType::Type::kInt64:
2657           locations->SetInAt(0, Location::Any());
2658           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2659           break;
2660 
2661         case DataType::Type::kFloat32:
2662           locations->SetInAt(0, Location::RequiresFpuRegister());
2663           locations->SetOut(Location::RequiresRegister());
2664           break;
2665 
2666         case DataType::Type::kFloat64:
2667           locations->SetInAt(0, Location::RequiresFpuRegister());
2668           locations->SetOut(Location::RequiresRegister());
2669           break;
2670 
2671         default:
2672           LOG(FATAL) << "Unexpected type conversion from " << input_type
2673                      << " to " << result_type;
2674       }
2675       break;
2676 
2677     case DataType::Type::kInt64:
2678       switch (input_type) {
2679         case DataType::Type::kBool:
2680         case DataType::Type::kUint8:
2681         case DataType::Type::kInt8:
2682         case DataType::Type::kUint16:
2683         case DataType::Type::kInt16:
2684         case DataType::Type::kInt32:
2685           // TODO: We would benefit from a (to-be-implemented)
2686           // Location::RegisterOrStackSlot requirement for this input.
2687           locations->SetInAt(0, Location::RequiresRegister());
2688           locations->SetOut(Location::RequiresRegister());
2689           break;
2690 
2691         case DataType::Type::kFloat32:
2692           locations->SetInAt(0, Location::RequiresFpuRegister());
2693           locations->SetOut(Location::RequiresRegister());
2694           break;
2695 
2696         case DataType::Type::kFloat64:
2697           locations->SetInAt(0, Location::RequiresFpuRegister());
2698           locations->SetOut(Location::RequiresRegister());
2699           break;
2700 
2701         default:
2702           LOG(FATAL) << "Unexpected type conversion from " << input_type
2703                      << " to " << result_type;
2704       }
2705       break;
2706 
2707     case DataType::Type::kFloat32:
2708       switch (input_type) {
2709         case DataType::Type::kBool:
2710         case DataType::Type::kUint8:
2711         case DataType::Type::kInt8:
2712         case DataType::Type::kUint16:
2713         case DataType::Type::kInt16:
2714         case DataType::Type::kInt32:
2715           locations->SetInAt(0, Location::Any());
2716           locations->SetOut(Location::RequiresFpuRegister());
2717           break;
2718 
2719         case DataType::Type::kInt64:
2720           locations->SetInAt(0, Location::Any());
2721           locations->SetOut(Location::RequiresFpuRegister());
2722           break;
2723 
2724         case DataType::Type::kFloat64:
2725           locations->SetInAt(0, Location::Any());
2726           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2727           break;
2728 
2729         default:
2730           LOG(FATAL) << "Unexpected type conversion from " << input_type
2731                      << " to " << result_type;
2732       }
2733       break;
2734 
2735     case DataType::Type::kFloat64:
2736       switch (input_type) {
2737         case DataType::Type::kBool:
2738         case DataType::Type::kUint8:
2739         case DataType::Type::kInt8:
2740         case DataType::Type::kUint16:
2741         case DataType::Type::kInt16:
2742         case DataType::Type::kInt32:
2743           locations->SetInAt(0, Location::Any());
2744           locations->SetOut(Location::RequiresFpuRegister());
2745           break;
2746 
2747         case DataType::Type::kInt64:
2748           locations->SetInAt(0, Location::Any());
2749           locations->SetOut(Location::RequiresFpuRegister());
2750           break;
2751 
2752         case DataType::Type::kFloat32:
2753           locations->SetInAt(0, Location::Any());
2754           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2755           break;
2756 
2757         default:
2758           LOG(FATAL) << "Unexpected type conversion from " << input_type
2759                      << " to " << result_type;
2760       }
2761       break;
2762 
2763     default:
2764       LOG(FATAL) << "Unexpected type conversion from " << input_type
2765                  << " to " << result_type;
2766   }
2767 }
2768 
2769 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2770   LocationSummary* locations = conversion->GetLocations();
2771   Location out = locations->Out();
2772   Location in = locations->InAt(0);
2773   DataType::Type result_type = conversion->GetResultType();
2774   DataType::Type input_type = conversion->GetInputType();
2775   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2776       << input_type << " -> " << result_type;
2777   switch (result_type) {
2778     case DataType::Type::kUint8:
2779       switch (input_type) {
2780         case DataType::Type::kInt8:
2781         case DataType::Type::kUint16:
2782         case DataType::Type::kInt16:
2783         case DataType::Type::kInt32:
2784         case DataType::Type::kInt64:
2785           if (in.IsRegister()) {
2786             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2787           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2788             __ movzxb(out.AsRegister<CpuRegister>(),
2789                       Address(CpuRegister(RSP), in.GetStackIndex()));
2790           } else {
2791             __ movl(out.AsRegister<CpuRegister>(),
2792                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2793           }
2794           break;
2795 
2796         default:
2797           LOG(FATAL) << "Unexpected type conversion from " << input_type
2798                      << " to " << result_type;
2799       }
2800       break;
2801 
2802     case DataType::Type::kInt8:
2803       switch (input_type) {
2804         case DataType::Type::kUint8:
2805         case DataType::Type::kUint16:
2806         case DataType::Type::kInt16:
2807         case DataType::Type::kInt32:
2808         case DataType::Type::kInt64:
2809           if (in.IsRegister()) {
2810             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2811           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2812             __ movsxb(out.AsRegister<CpuRegister>(),
2813                       Address(CpuRegister(RSP), in.GetStackIndex()));
2814           } else {
2815             __ movl(out.AsRegister<CpuRegister>(),
2816                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2817           }
2818           break;
2819 
2820         default:
2821           LOG(FATAL) << "Unexpected type conversion from " << input_type
2822                      << " to " << result_type;
2823       }
2824       break;
2825 
2826     case DataType::Type::kUint16:
2827       switch (input_type) {
2828         case DataType::Type::kInt8:
2829         case DataType::Type::kInt16:
2830         case DataType::Type::kInt32:
2831         case DataType::Type::kInt64:
2832           if (in.IsRegister()) {
2833             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2834           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2835             __ movzxw(out.AsRegister<CpuRegister>(),
2836                       Address(CpuRegister(RSP), in.GetStackIndex()));
2837           } else {
2838             __ movl(out.AsRegister<CpuRegister>(),
2839                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2840           }
2841           break;
2842 
2843         default:
2844           LOG(FATAL) << "Unexpected type conversion from " << input_type
2845                      << " to " << result_type;
2846       }
2847       break;
2848 
2849     case DataType::Type::kInt16:
2850       switch (input_type) {
2851         case DataType::Type::kUint16:
2852         case DataType::Type::kInt32:
2853         case DataType::Type::kInt64:
2854           if (in.IsRegister()) {
2855             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2856           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2857             __ movsxw(out.AsRegister<CpuRegister>(),
2858                       Address(CpuRegister(RSP), in.GetStackIndex()));
2859           } else {
2860             __ movl(out.AsRegister<CpuRegister>(),
2861                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2862           }
2863           break;
2864 
2865         default:
2866           LOG(FATAL) << "Unexpected type conversion from " << input_type
2867                      << " to " << result_type;
2868       }
2869       break;
2870 
2871     case DataType::Type::kInt32:
2872       switch (input_type) {
2873         case DataType::Type::kInt64:
2874           if (in.IsRegister()) {
2875             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2876           } else if (in.IsDoubleStackSlot()) {
2877             __ movl(out.AsRegister<CpuRegister>(),
2878                     Address(CpuRegister(RSP), in.GetStackIndex()));
2879           } else {
2880             DCHECK(in.IsConstant());
2881             DCHECK(in.GetConstant()->IsLongConstant());
2882             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2883             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2884           }
2885           break;
2886 
2887         case DataType::Type::kFloat32: {
2888           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2889           CpuRegister output = out.AsRegister<CpuRegister>();
2890           NearLabel done, nan;
2891 
2892           __ movl(output, Immediate(kPrimIntMax));
2893           // if input >= (float)INT_MAX goto done
2894           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2895           __ j(kAboveEqual, &done);
2896           // if input == NaN goto nan
2897           __ j(kUnordered, &nan);
2898           // output = float-to-int-truncate(input)
2899           __ cvttss2si(output, input, false);
2900           __ jmp(&done);
2901           __ Bind(&nan);
2902           //  output = 0
2903           __ xorl(output, output);
2904           __ Bind(&done);
2905           break;
2906         }
2907 
2908         case DataType::Type::kFloat64: {
2909           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2910           CpuRegister output = out.AsRegister<CpuRegister>();
2911           NearLabel done, nan;
2912 
2913           __ movl(output, Immediate(kPrimIntMax));
2914           // if input >= (double)INT_MAX goto done
2915           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2916           __ j(kAboveEqual, &done);
2917           // if input == NaN goto nan
2918           __ j(kUnordered, &nan);
2919           // output = double-to-int-truncate(input)
2920           __ cvttsd2si(output, input);
2921           __ jmp(&done);
2922           __ Bind(&nan);
2923           //  output = 0
2924           __ xorl(output, output);
2925           __ Bind(&done);
2926           break;
2927         }
2928 
2929         default:
2930           LOG(FATAL) << "Unexpected type conversion from " << input_type
2931                      << " to " << result_type;
2932       }
2933       break;
2934 
2935     case DataType::Type::kInt64:
2936       switch (input_type) {
2937         DCHECK(out.IsRegister());
2938         case DataType::Type::kBool:
2939         case DataType::Type::kUint8:
2940         case DataType::Type::kInt8:
2941         case DataType::Type::kUint16:
2942         case DataType::Type::kInt16:
2943         case DataType::Type::kInt32:
2944           DCHECK(in.IsRegister());
2945           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2946           break;
2947 
2948         case DataType::Type::kFloat32: {
2949           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2950           CpuRegister output = out.AsRegister<CpuRegister>();
2951           NearLabel done, nan;
2952 
2953           codegen_->Load64BitValue(output, kPrimLongMax);
2954           // if input >= (float)LONG_MAX goto done
2955           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2956           __ j(kAboveEqual, &done);
2957           // if input == NaN goto nan
2958           __ j(kUnordered, &nan);
2959           // output = float-to-long-truncate(input)
2960           __ cvttss2si(output, input, true);
2961           __ jmp(&done);
2962           __ Bind(&nan);
2963           //  output = 0
2964           __ xorl(output, output);
2965           __ Bind(&done);
2966           break;
2967         }
2968 
2969         case DataType::Type::kFloat64: {
2970           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2971           CpuRegister output = out.AsRegister<CpuRegister>();
2972           NearLabel done, nan;
2973 
2974           codegen_->Load64BitValue(output, kPrimLongMax);
2975           // if input >= (double)LONG_MAX goto done
2976           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2977           __ j(kAboveEqual, &done);
2978           // if input == NaN goto nan
2979           __ j(kUnordered, &nan);
2980           // output = double-to-long-truncate(input)
2981           __ cvttsd2si(output, input, true);
2982           __ jmp(&done);
2983           __ Bind(&nan);
2984           //  output = 0
2985           __ xorl(output, output);
2986           __ Bind(&done);
2987           break;
2988         }
2989 
2990         default:
2991           LOG(FATAL) << "Unexpected type conversion from " << input_type
2992                      << " to " << result_type;
2993       }
2994       break;
2995 
2996     case DataType::Type::kFloat32:
2997       switch (input_type) {
2998         case DataType::Type::kBool:
2999         case DataType::Type::kUint8:
3000         case DataType::Type::kInt8:
3001         case DataType::Type::kUint16:
3002         case DataType::Type::kInt16:
3003         case DataType::Type::kInt32:
3004           if (in.IsRegister()) {
3005             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3006           } else if (in.IsConstant()) {
3007             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3008             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3009             codegen_->Load32BitValue(dest, static_cast<float>(v));
3010           } else {
3011             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3012                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3013           }
3014           break;
3015 
3016         case DataType::Type::kInt64:
3017           if (in.IsRegister()) {
3018             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3019           } else if (in.IsConstant()) {
3020             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3021             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3022             codegen_->Load32BitValue(dest, static_cast<float>(v));
3023           } else {
3024             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3025                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3026           }
3027           break;
3028 
3029         case DataType::Type::kFloat64:
3030           if (in.IsFpuRegister()) {
3031             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3032           } else if (in.IsConstant()) {
3033             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3034             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3035             codegen_->Load32BitValue(dest, static_cast<float>(v));
3036           } else {
3037             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3038                         Address(CpuRegister(RSP), in.GetStackIndex()));
3039           }
3040           break;
3041 
3042         default:
3043           LOG(FATAL) << "Unexpected type conversion from " << input_type
3044                      << " to " << result_type;
3045       }
3046       break;
3047 
3048     case DataType::Type::kFloat64:
3049       switch (input_type) {
3050         case DataType::Type::kBool:
3051         case DataType::Type::kUint8:
3052         case DataType::Type::kInt8:
3053         case DataType::Type::kUint16:
3054         case DataType::Type::kInt16:
3055         case DataType::Type::kInt32:
3056           if (in.IsRegister()) {
3057             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3058           } else if (in.IsConstant()) {
3059             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3060             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3061             codegen_->Load64BitValue(dest, static_cast<double>(v));
3062           } else {
3063             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3064                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3065           }
3066           break;
3067 
3068         case DataType::Type::kInt64:
3069           if (in.IsRegister()) {
3070             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3071           } else if (in.IsConstant()) {
3072             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3073             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3074             codegen_->Load64BitValue(dest, static_cast<double>(v));
3075           } else {
3076             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3077                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3078           }
3079           break;
3080 
3081         case DataType::Type::kFloat32:
3082           if (in.IsFpuRegister()) {
3083             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3084           } else if (in.IsConstant()) {
3085             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3086             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3087             codegen_->Load64BitValue(dest, static_cast<double>(v));
3088           } else {
3089             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3090                         Address(CpuRegister(RSP), in.GetStackIndex()));
3091           }
3092           break;
3093 
3094         default:
3095           LOG(FATAL) << "Unexpected type conversion from " << input_type
3096                      << " to " << result_type;
3097       }
3098       break;
3099 
3100     default:
3101       LOG(FATAL) << "Unexpected type conversion from " << input_type
3102                  << " to " << result_type;
3103   }
3104 }
3105 
3106 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3107   LocationSummary* locations =
3108       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3109   switch (add->GetResultType()) {
3110     case DataType::Type::kInt32: {
3111       locations->SetInAt(0, Location::RequiresRegister());
3112       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3113       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3114       break;
3115     }
3116 
3117     case DataType::Type::kInt64: {
3118       locations->SetInAt(0, Location::RequiresRegister());
3119       // We can use a leaq or addq if the constant can fit in an immediate.
3120       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3121       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3122       break;
3123     }
3124 
3125     case DataType::Type::kFloat64:
3126     case DataType::Type::kFloat32: {
3127       locations->SetInAt(0, Location::RequiresFpuRegister());
3128       locations->SetInAt(1, Location::Any());
3129       locations->SetOut(Location::SameAsFirstInput());
3130       break;
3131     }
3132 
3133     default:
3134       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3135   }
3136 }
3137 
3138 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3139   LocationSummary* locations = add->GetLocations();
3140   Location first = locations->InAt(0);
3141   Location second = locations->InAt(1);
3142   Location out = locations->Out();
3143 
3144   switch (add->GetResultType()) {
3145     case DataType::Type::kInt32: {
3146       if (second.IsRegister()) {
3147         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3148           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3149         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3150           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3151         } else {
3152           __ leal(out.AsRegister<CpuRegister>(), Address(
3153               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3154         }
3155       } else if (second.IsConstant()) {
3156         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3157           __ addl(out.AsRegister<CpuRegister>(),
3158                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3159         } else {
3160           __ leal(out.AsRegister<CpuRegister>(), Address(
3161               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3162         }
3163       } else {
3164         DCHECK(first.Equals(locations->Out()));
3165         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3166       }
3167       break;
3168     }
3169 
3170     case DataType::Type::kInt64: {
3171       if (second.IsRegister()) {
3172         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3173           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3174         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3175           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3176         } else {
3177           __ leaq(out.AsRegister<CpuRegister>(), Address(
3178               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3179         }
3180       } else {
3181         DCHECK(second.IsConstant());
3182         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3183         int32_t int32_value = Low32Bits(value);
3184         DCHECK_EQ(int32_value, value);
3185         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3186           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3187         } else {
3188           __ leaq(out.AsRegister<CpuRegister>(), Address(
3189               first.AsRegister<CpuRegister>(), int32_value));
3190         }
3191       }
3192       break;
3193     }
3194 
3195     case DataType::Type::kFloat32: {
3196       if (second.IsFpuRegister()) {
3197         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3198       } else if (second.IsConstant()) {
3199         __ addss(first.AsFpuRegister<XmmRegister>(),
3200                  codegen_->LiteralFloatAddress(
3201                      second.GetConstant()->AsFloatConstant()->GetValue()));
3202       } else {
3203         DCHECK(second.IsStackSlot());
3204         __ addss(first.AsFpuRegister<XmmRegister>(),
3205                  Address(CpuRegister(RSP), second.GetStackIndex()));
3206       }
3207       break;
3208     }
3209 
3210     case DataType::Type::kFloat64: {
3211       if (second.IsFpuRegister()) {
3212         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3213       } else if (second.IsConstant()) {
3214         __ addsd(first.AsFpuRegister<XmmRegister>(),
3215                  codegen_->LiteralDoubleAddress(
3216                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3217       } else {
3218         DCHECK(second.IsDoubleStackSlot());
3219         __ addsd(first.AsFpuRegister<XmmRegister>(),
3220                  Address(CpuRegister(RSP), second.GetStackIndex()));
3221       }
3222       break;
3223     }
3224 
3225     default:
3226       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3227   }
3228 }
3229 
3230 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3231   LocationSummary* locations =
3232       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3233   switch (sub->GetResultType()) {
3234     case DataType::Type::kInt32: {
3235       locations->SetInAt(0, Location::RequiresRegister());
3236       locations->SetInAt(1, Location::Any());
3237       locations->SetOut(Location::SameAsFirstInput());
3238       break;
3239     }
3240     case DataType::Type::kInt64: {
3241       locations->SetInAt(0, Location::RequiresRegister());
3242       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3243       locations->SetOut(Location::SameAsFirstInput());
3244       break;
3245     }
3246     case DataType::Type::kFloat32:
3247     case DataType::Type::kFloat64: {
3248       locations->SetInAt(0, Location::RequiresFpuRegister());
3249       locations->SetInAt(1, Location::Any());
3250       locations->SetOut(Location::SameAsFirstInput());
3251       break;
3252     }
3253     default:
3254       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3255   }
3256 }
3257 
3258 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3259   LocationSummary* locations = sub->GetLocations();
3260   Location first = locations->InAt(0);
3261   Location second = locations->InAt(1);
3262   DCHECK(first.Equals(locations->Out()));
3263   switch (sub->GetResultType()) {
3264     case DataType::Type::kInt32: {
3265       if (second.IsRegister()) {
3266         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3267       } else if (second.IsConstant()) {
3268         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3269         __ subl(first.AsRegister<CpuRegister>(), imm);
3270       } else {
3271         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3272       }
3273       break;
3274     }
3275     case DataType::Type::kInt64: {
3276       if (second.IsConstant()) {
3277         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3278         DCHECK(IsInt<32>(value));
3279         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3280       } else {
3281         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3282       }
3283       break;
3284     }
3285 
3286     case DataType::Type::kFloat32: {
3287       if (second.IsFpuRegister()) {
3288         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3289       } else if (second.IsConstant()) {
3290         __ subss(first.AsFpuRegister<XmmRegister>(),
3291                  codegen_->LiteralFloatAddress(
3292                      second.GetConstant()->AsFloatConstant()->GetValue()));
3293       } else {
3294         DCHECK(second.IsStackSlot());
3295         __ subss(first.AsFpuRegister<XmmRegister>(),
3296                  Address(CpuRegister(RSP), second.GetStackIndex()));
3297       }
3298       break;
3299     }
3300 
3301     case DataType::Type::kFloat64: {
3302       if (second.IsFpuRegister()) {
3303         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3304       } else if (second.IsConstant()) {
3305         __ subsd(first.AsFpuRegister<XmmRegister>(),
3306                  codegen_->LiteralDoubleAddress(
3307                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3308       } else {
3309         DCHECK(second.IsDoubleStackSlot());
3310         __ subsd(first.AsFpuRegister<XmmRegister>(),
3311                  Address(CpuRegister(RSP), second.GetStackIndex()));
3312       }
3313       break;
3314     }
3315 
3316     default:
3317       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3318   }
3319 }
3320 
3321 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3322   LocationSummary* locations =
3323       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3324   switch (mul->GetResultType()) {
3325     case DataType::Type::kInt32: {
3326       locations->SetInAt(0, Location::RequiresRegister());
3327       locations->SetInAt(1, Location::Any());
3328       if (mul->InputAt(1)->IsIntConstant()) {
3329         // Can use 3 operand multiply.
3330         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3331       } else {
3332         locations->SetOut(Location::SameAsFirstInput());
3333       }
3334       break;
3335     }
3336     case DataType::Type::kInt64: {
3337       locations->SetInAt(0, Location::RequiresRegister());
3338       locations->SetInAt(1, Location::Any());
3339       if (mul->InputAt(1)->IsLongConstant() &&
3340           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3341         // Can use 3 operand multiply.
3342         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3343       } else {
3344         locations->SetOut(Location::SameAsFirstInput());
3345       }
3346       break;
3347     }
3348     case DataType::Type::kFloat32:
3349     case DataType::Type::kFloat64: {
3350       locations->SetInAt(0, Location::RequiresFpuRegister());
3351       locations->SetInAt(1, Location::Any());
3352       locations->SetOut(Location::SameAsFirstInput());
3353       break;
3354     }
3355 
3356     default:
3357       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3358   }
3359 }
3360 
3361 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3362   LocationSummary* locations = mul->GetLocations();
3363   Location first = locations->InAt(0);
3364   Location second = locations->InAt(1);
3365   Location out = locations->Out();
3366   switch (mul->GetResultType()) {
3367     case DataType::Type::kInt32:
3368       // The constant may have ended up in a register, so test explicitly to avoid
3369       // problems where the output may not be the same as the first operand.
3370       if (mul->InputAt(1)->IsIntConstant()) {
3371         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3372         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3373       } else if (second.IsRegister()) {
3374         DCHECK(first.Equals(out));
3375         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3376       } else {
3377         DCHECK(first.Equals(out));
3378         DCHECK(second.IsStackSlot());
3379         __ imull(first.AsRegister<CpuRegister>(),
3380                  Address(CpuRegister(RSP), second.GetStackIndex()));
3381       }
3382       break;
3383     case DataType::Type::kInt64: {
3384       // The constant may have ended up in a register, so test explicitly to avoid
3385       // problems where the output may not be the same as the first operand.
3386       if (mul->InputAt(1)->IsLongConstant()) {
3387         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3388         if (IsInt<32>(value)) {
3389           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3390                    Immediate(static_cast<int32_t>(value)));
3391         } else {
3392           // Have to use the constant area.
3393           DCHECK(first.Equals(out));
3394           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3395         }
3396       } else if (second.IsRegister()) {
3397         DCHECK(first.Equals(out));
3398         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3399       } else {
3400         DCHECK(second.IsDoubleStackSlot());
3401         DCHECK(first.Equals(out));
3402         __ imulq(first.AsRegister<CpuRegister>(),
3403                  Address(CpuRegister(RSP), second.GetStackIndex()));
3404       }
3405       break;
3406     }
3407 
3408     case DataType::Type::kFloat32: {
3409       DCHECK(first.Equals(out));
3410       if (second.IsFpuRegister()) {
3411         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3412       } else if (second.IsConstant()) {
3413         __ mulss(first.AsFpuRegister<XmmRegister>(),
3414                  codegen_->LiteralFloatAddress(
3415                      second.GetConstant()->AsFloatConstant()->GetValue()));
3416       } else {
3417         DCHECK(second.IsStackSlot());
3418         __ mulss(first.AsFpuRegister<XmmRegister>(),
3419                  Address(CpuRegister(RSP), second.GetStackIndex()));
3420       }
3421       break;
3422     }
3423 
3424     case DataType::Type::kFloat64: {
3425       DCHECK(first.Equals(out));
3426       if (second.IsFpuRegister()) {
3427         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3428       } else if (second.IsConstant()) {
3429         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3430                  codegen_->LiteralDoubleAddress(
3431                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3432       } else {
3433         DCHECK(second.IsDoubleStackSlot());
3434         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3435                  Address(CpuRegister(RSP), second.GetStackIndex()));
3436       }
3437       break;
3438     }
3439 
3440     default:
3441       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3442   }
3443 }
3444 
3445 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3446                                                      uint32_t stack_adjustment, bool is_float) {
3447   if (source.IsStackSlot()) {
3448     DCHECK(is_float);
3449     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3450   } else if (source.IsDoubleStackSlot()) {
3451     DCHECK(!is_float);
3452     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3453   } else {
3454     // Write the value to the temporary location on the stack and load to FP stack.
3455     if (is_float) {
3456       Location stack_temp = Location::StackSlot(temp_offset);
3457       codegen_->Move(stack_temp, source);
3458       __ flds(Address(CpuRegister(RSP), temp_offset));
3459     } else {
3460       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3461       codegen_->Move(stack_temp, source);
3462       __ fldl(Address(CpuRegister(RSP), temp_offset));
3463     }
3464   }
3465 }
3466 
3467 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3468   DataType::Type type = rem->GetResultType();
3469   bool is_float = type == DataType::Type::kFloat32;
3470   size_t elem_size = DataType::Size(type);
3471   LocationSummary* locations = rem->GetLocations();
3472   Location first = locations->InAt(0);
3473   Location second = locations->InAt(1);
3474   Location out = locations->Out();
3475 
3476   // Create stack space for 2 elements.
3477   // TODO: enhance register allocator to ask for stack temporaries.
3478   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3479 
3480   // Load the values to the FP stack in reverse order, using temporaries if needed.
3481   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3482   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3483 
3484   // Loop doing FPREM until we stabilize.
3485   NearLabel retry;
3486   __ Bind(&retry);
3487   __ fprem();
3488 
3489   // Move FP status to AX.
3490   __ fstsw();
3491 
3492   // And see if the argument reduction is complete. This is signaled by the
3493   // C2 FPU flag bit set to 0.
3494   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3495   __ j(kNotEqual, &retry);
3496 
3497   // We have settled on the final value. Retrieve it into an XMM register.
3498   // Store FP top of stack to real stack.
3499   if (is_float) {
3500     __ fsts(Address(CpuRegister(RSP), 0));
3501   } else {
3502     __ fstl(Address(CpuRegister(RSP), 0));
3503   }
3504 
3505   // Pop the 2 items from the FP stack.
3506   __ fucompp();
3507 
3508   // Load the value from the stack into an XMM register.
3509   DCHECK(out.IsFpuRegister()) << out;
3510   if (is_float) {
3511     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3512   } else {
3513     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3514   }
3515 
3516   // And remove the temporary stack space we allocated.
3517   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3518 }
3519 
3520 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3521   DCHECK(instruction->IsDiv() || instruction->IsRem());
3522 
3523   LocationSummary* locations = instruction->GetLocations();
3524   Location second = locations->InAt(1);
3525   DCHECK(second.IsConstant());
3526 
3527   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3528   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3529   int64_t imm = Int64FromConstant(second.GetConstant());
3530 
3531   DCHECK(imm == 1 || imm == -1);
3532 
3533   switch (instruction->GetResultType()) {
3534     case DataType::Type::kInt32: {
3535       if (instruction->IsRem()) {
3536         __ xorl(output_register, output_register);
3537       } else {
3538         __ movl(output_register, input_register);
3539         if (imm == -1) {
3540           __ negl(output_register);
3541         }
3542       }
3543       break;
3544     }
3545 
3546     case DataType::Type::kInt64: {
3547       if (instruction->IsRem()) {
3548         __ xorl(output_register, output_register);
3549       } else {
3550         __ movq(output_register, input_register);
3551         if (imm == -1) {
3552           __ negq(output_register);
3553         }
3554       }
3555       break;
3556     }
3557 
3558     default:
3559       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3560   }
3561 }
3562 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3563   LocationSummary* locations = instruction->GetLocations();
3564   Location second = locations->InAt(1);
3565   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3566   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3567   int64_t imm = Int64FromConstant(second.GetConstant());
3568   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3569   uint64_t abs_imm = AbsOrMin(imm);
3570   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3571   if (instruction->GetResultType() == DataType::Type::kInt32) {
3572     NearLabel done;
3573     __ movl(out, numerator);
3574     __ andl(out, Immediate(abs_imm-1));
3575     __ j(Condition::kZero, &done);
3576     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3577     __ testl(numerator, numerator);
3578     __ cmov(Condition::kLess, out, tmp, false);
3579     __ Bind(&done);
3580 
3581   } else {
3582     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3583     codegen_->Load64BitValue(tmp, abs_imm - 1);
3584     NearLabel done;
3585 
3586     __ movq(out, numerator);
3587     __ andq(out, tmp);
3588     __ j(Condition::kZero, &done);
3589     __ movq(tmp, numerator);
3590     __ sarq(tmp, Immediate(63));
3591     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3592     __ orq(out, tmp);
3593     __ Bind(&done);
3594   }
3595 }
3596 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3597   LocationSummary* locations = instruction->GetLocations();
3598   Location second = locations->InAt(1);
3599 
3600   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3601   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3602 
3603   int64_t imm = Int64FromConstant(second.GetConstant());
3604   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3605   uint64_t abs_imm = AbsOrMin(imm);
3606 
3607   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3608 
3609   if (instruction->GetResultType() == DataType::Type::kInt32) {
3610     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3611     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3612     if (abs_imm == 2) {
3613       __ leal(tmp, Address(numerator, 0));
3614       __ shrl(tmp, Immediate(31));
3615       __ addl(tmp, numerator);
3616     } else {
3617       __ leal(tmp, Address(numerator, abs_imm - 1));
3618       __ testl(numerator, numerator);
3619       __ cmov(kGreaterEqual, tmp, numerator);
3620     }
3621     int shift = CTZ(imm);
3622     __ sarl(tmp, Immediate(shift));
3623 
3624     if (imm < 0) {
3625       __ negl(tmp);
3626     }
3627 
3628     __ movl(output_register, tmp);
3629   } else {
3630     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3631     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3632     if (abs_imm == 2) {
3633       __ movq(rdx, numerator);
3634       __ shrq(rdx, Immediate(63));
3635       __ addq(rdx, numerator);
3636     } else {
3637       codegen_->Load64BitValue(rdx, abs_imm - 1);
3638       __ addq(rdx, numerator);
3639       __ testq(numerator, numerator);
3640       __ cmov(kGreaterEqual, rdx, numerator);
3641     }
3642     int shift = CTZ(imm);
3643     __ sarq(rdx, Immediate(shift));
3644 
3645     if (imm < 0) {
3646       __ negq(rdx);
3647     }
3648 
3649     __ movq(output_register, rdx);
3650   }
3651 }
3652 
3653 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3654   DCHECK(instruction->IsDiv() || instruction->IsRem());
3655 
3656   LocationSummary* locations = instruction->GetLocations();
3657   Location second = locations->InAt(1);
3658 
3659   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3660       : locations->GetTemp(0).AsRegister<CpuRegister>();
3661   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3662   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3663       : locations->Out().AsRegister<CpuRegister>();
3664   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3665 
3666   DCHECK_EQ(RAX, eax.AsRegister());
3667   DCHECK_EQ(RDX, edx.AsRegister());
3668   if (instruction->IsDiv()) {
3669     DCHECK_EQ(RAX, out.AsRegister());
3670   } else {
3671     DCHECK_EQ(RDX, out.AsRegister());
3672   }
3673 
3674   int64_t magic;
3675   int shift;
3676 
3677   // TODO: can these branches be written as one?
3678   if (instruction->GetResultType() == DataType::Type::kInt32) {
3679     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3680 
3681     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3682 
3683     __ movl(numerator, eax);
3684 
3685     __ movl(eax, Immediate(magic));
3686     __ imull(numerator);
3687 
3688     if (imm > 0 && magic < 0) {
3689       __ addl(edx, numerator);
3690     } else if (imm < 0 && magic > 0) {
3691       __ subl(edx, numerator);
3692     }
3693 
3694     if (shift != 0) {
3695       __ sarl(edx, Immediate(shift));
3696     }
3697 
3698     __ movl(eax, edx);
3699     __ shrl(edx, Immediate(31));
3700     __ addl(edx, eax);
3701 
3702     if (instruction->IsRem()) {
3703       __ movl(eax, numerator);
3704       __ imull(edx, Immediate(imm));
3705       __ subl(eax, edx);
3706       __ movl(edx, eax);
3707     } else {
3708       __ movl(eax, edx);
3709     }
3710   } else {
3711     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3712 
3713     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3714 
3715     CpuRegister rax = eax;
3716     CpuRegister rdx = edx;
3717 
3718     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
3719 
3720     // Save the numerator.
3721     __ movq(numerator, rax);
3722 
3723     // RAX = magic
3724     codegen_->Load64BitValue(rax, magic);
3725 
3726     // RDX:RAX = magic * numerator
3727     __ imulq(numerator);
3728 
3729     if (imm > 0 && magic < 0) {
3730       // RDX += numerator
3731       __ addq(rdx, numerator);
3732     } else if (imm < 0 && magic > 0) {
3733       // RDX -= numerator
3734       __ subq(rdx, numerator);
3735     }
3736 
3737     // Shift if needed.
3738     if (shift != 0) {
3739       __ sarq(rdx, Immediate(shift));
3740     }
3741 
3742     // RDX += 1 if RDX < 0
3743     __ movq(rax, rdx);
3744     __ shrq(rdx, Immediate(63));
3745     __ addq(rdx, rax);
3746 
3747     if (instruction->IsRem()) {
3748       __ movq(rax, numerator);
3749 
3750       if (IsInt<32>(imm)) {
3751         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3752       } else {
3753         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3754       }
3755 
3756       __ subq(rax, rdx);
3757       __ movq(rdx, rax);
3758     } else {
3759       __ movq(rax, rdx);
3760     }
3761   }
3762 }
3763 
3764 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3765   DCHECK(instruction->IsDiv() || instruction->IsRem());
3766   DataType::Type type = instruction->GetResultType();
3767   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3768 
3769   bool is_div = instruction->IsDiv();
3770   LocationSummary* locations = instruction->GetLocations();
3771 
3772   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3773   Location second = locations->InAt(1);
3774 
3775   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3776   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3777 
3778   if (second.IsConstant()) {
3779     int64_t imm = Int64FromConstant(second.GetConstant());
3780 
3781     if (imm == 0) {
3782       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3783     } else if (imm == 1 || imm == -1) {
3784       DivRemOneOrMinusOne(instruction);
3785     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3786       if (is_div) {
3787         DivByPowerOfTwo(instruction->AsDiv());
3788       } else {
3789         RemByPowerOfTwo(instruction->AsRem());
3790       }
3791     } else {
3792       DCHECK(imm <= -2 || imm >= 2);
3793       GenerateDivRemWithAnyConstant(instruction);
3794     }
3795   } else {
3796     SlowPathCode* slow_path =
3797         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3798             instruction, out.AsRegister(), type, is_div);
3799     codegen_->AddSlowPath(slow_path);
3800 
3801     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3802     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3803     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3804     // so it's safe to just use negl instead of more complex comparisons.
3805     if (type == DataType::Type::kInt32) {
3806       __ cmpl(second_reg, Immediate(-1));
3807       __ j(kEqual, slow_path->GetEntryLabel());
3808       // edx:eax <- sign-extended of eax
3809       __ cdq();
3810       // eax = quotient, edx = remainder
3811       __ idivl(second_reg);
3812     } else {
3813       __ cmpq(second_reg, Immediate(-1));
3814       __ j(kEqual, slow_path->GetEntryLabel());
3815       // rdx:rax <- sign-extended of rax
3816       __ cqo();
3817       // rax = quotient, rdx = remainder
3818       __ idivq(second_reg);
3819     }
3820     __ Bind(slow_path->GetExitLabel());
3821   }
3822 }
3823 
3824 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3825   LocationSummary* locations =
3826       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3827   switch (div->GetResultType()) {
3828     case DataType::Type::kInt32:
3829     case DataType::Type::kInt64: {
3830       locations->SetInAt(0, Location::RegisterLocation(RAX));
3831       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3832       locations->SetOut(Location::SameAsFirstInput());
3833       // Intel uses edx:eax as the dividend.
3834       locations->AddTemp(Location::RegisterLocation(RDX));
3835       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3836       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3837       // output and request another temp.
3838       if (div->InputAt(1)->IsConstant()) {
3839         locations->AddTemp(Location::RequiresRegister());
3840       }
3841       break;
3842     }
3843 
3844     case DataType::Type::kFloat32:
3845     case DataType::Type::kFloat64: {
3846       locations->SetInAt(0, Location::RequiresFpuRegister());
3847       locations->SetInAt(1, Location::Any());
3848       locations->SetOut(Location::SameAsFirstInput());
3849       break;
3850     }
3851 
3852     default:
3853       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3854   }
3855 }
3856 
3857 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3858   LocationSummary* locations = div->GetLocations();
3859   Location first = locations->InAt(0);
3860   Location second = locations->InAt(1);
3861   DCHECK(first.Equals(locations->Out()));
3862 
3863   DataType::Type type = div->GetResultType();
3864   switch (type) {
3865     case DataType::Type::kInt32:
3866     case DataType::Type::kInt64: {
3867       GenerateDivRemIntegral(div);
3868       break;
3869     }
3870 
3871     case DataType::Type::kFloat32: {
3872       if (second.IsFpuRegister()) {
3873         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3874       } else if (second.IsConstant()) {
3875         __ divss(first.AsFpuRegister<XmmRegister>(),
3876                  codegen_->LiteralFloatAddress(
3877                      second.GetConstant()->AsFloatConstant()->GetValue()));
3878       } else {
3879         DCHECK(second.IsStackSlot());
3880         __ divss(first.AsFpuRegister<XmmRegister>(),
3881                  Address(CpuRegister(RSP), second.GetStackIndex()));
3882       }
3883       break;
3884     }
3885 
3886     case DataType::Type::kFloat64: {
3887       if (second.IsFpuRegister()) {
3888         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3889       } else if (second.IsConstant()) {
3890         __ divsd(first.AsFpuRegister<XmmRegister>(),
3891                  codegen_->LiteralDoubleAddress(
3892                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3893       } else {
3894         DCHECK(second.IsDoubleStackSlot());
3895         __ divsd(first.AsFpuRegister<XmmRegister>(),
3896                  Address(CpuRegister(RSP), second.GetStackIndex()));
3897       }
3898       break;
3899     }
3900 
3901     default:
3902       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3903   }
3904 }
3905 
3906 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3907   DataType::Type type = rem->GetResultType();
3908   LocationSummary* locations =
3909     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
3910 
3911   switch (type) {
3912     case DataType::Type::kInt32:
3913     case DataType::Type::kInt64: {
3914       locations->SetInAt(0, Location::RegisterLocation(RAX));
3915       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3916       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3917       locations->SetOut(Location::RegisterLocation(RDX));
3918       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3919       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3920       // output and request another temp.
3921       if (rem->InputAt(1)->IsConstant()) {
3922         locations->AddTemp(Location::RequiresRegister());
3923       }
3924       break;
3925     }
3926 
3927     case DataType::Type::kFloat32:
3928     case DataType::Type::kFloat64: {
3929       locations->SetInAt(0, Location::Any());
3930       locations->SetInAt(1, Location::Any());
3931       locations->SetOut(Location::RequiresFpuRegister());
3932       locations->AddTemp(Location::RegisterLocation(RAX));
3933       break;
3934     }
3935 
3936     default:
3937       LOG(FATAL) << "Unexpected rem type " << type;
3938   }
3939 }
3940 
3941 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3942   DataType::Type type = rem->GetResultType();
3943   switch (type) {
3944     case DataType::Type::kInt32:
3945     case DataType::Type::kInt64: {
3946       GenerateDivRemIntegral(rem);
3947       break;
3948     }
3949     case DataType::Type::kFloat32:
3950     case DataType::Type::kFloat64: {
3951       GenerateRemFP(rem);
3952       break;
3953     }
3954     default:
3955       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3956   }
3957 }
3958 
3959 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
3960   LocationSummary* locations = new (allocator) LocationSummary(minmax);
3961   switch (minmax->GetResultType()) {
3962     case DataType::Type::kInt32:
3963     case DataType::Type::kInt64:
3964       locations->SetInAt(0, Location::RequiresRegister());
3965       locations->SetInAt(1, Location::RequiresRegister());
3966       locations->SetOut(Location::SameAsFirstInput());
3967       break;
3968     case DataType::Type::kFloat32:
3969     case DataType::Type::kFloat64:
3970       locations->SetInAt(0, Location::RequiresFpuRegister());
3971       locations->SetInAt(1, Location::RequiresFpuRegister());
3972       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
3973       // the second input to be the output (we can simply swap inputs).
3974       locations->SetOut(Location::SameAsFirstInput());
3975       break;
3976     default:
3977       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
3978   }
3979 }
3980 
3981 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
3982                                                        bool is_min,
3983                                                        DataType::Type type) {
3984   Location op1_loc = locations->InAt(0);
3985   Location op2_loc = locations->InAt(1);
3986 
3987   // Shortcut for same input locations.
3988   if (op1_loc.Equals(op2_loc)) {
3989     // Can return immediately, as op1_loc == out_loc.
3990     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
3991     //       a copy here.
3992     DCHECK(locations->Out().Equals(op1_loc));
3993     return;
3994   }
3995 
3996   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3997   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
3998 
3999   //  (out := op1)
4000   //  out <=? op2
4001   //  if out is min jmp done
4002   //  out := op2
4003   // done:
4004 
4005   if (type == DataType::Type::kInt64) {
4006     __ cmpq(out, op2);
4007     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4008   } else {
4009     DCHECK_EQ(type, DataType::Type::kInt32);
4010     __ cmpl(out, op2);
4011     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4012   }
4013 }
4014 
4015 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4016                                                       bool is_min,
4017                                                       DataType::Type type) {
4018   Location op1_loc = locations->InAt(0);
4019   Location op2_loc = locations->InAt(1);
4020   Location out_loc = locations->Out();
4021   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4022 
4023   // Shortcut for same input locations.
4024   if (op1_loc.Equals(op2_loc)) {
4025     DCHECK(out_loc.Equals(op1_loc));
4026     return;
4027   }
4028 
4029   //  (out := op1)
4030   //  out <=? op2
4031   //  if Nan jmp Nan_label
4032   //  if out is min jmp done
4033   //  if op2 is min jmp op2_label
4034   //  handle -0/+0
4035   //  jmp done
4036   // Nan_label:
4037   //  out := NaN
4038   // op2_label:
4039   //  out := op2
4040   // done:
4041   //
4042   // This removes one jmp, but needs to copy one input (op1) to out.
4043   //
4044   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4045 
4046   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4047 
4048   NearLabel nan, done, op2_label;
4049   if (type == DataType::Type::kFloat64) {
4050     __ ucomisd(out, op2);
4051   } else {
4052     DCHECK_EQ(type, DataType::Type::kFloat32);
4053     __ ucomiss(out, op2);
4054   }
4055 
4056   __ j(Condition::kParityEven, &nan);
4057 
4058   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4059   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4060 
4061   // Handle 0.0/-0.0.
4062   if (is_min) {
4063     if (type == DataType::Type::kFloat64) {
4064       __ orpd(out, op2);
4065     } else {
4066       __ orps(out, op2);
4067     }
4068   } else {
4069     if (type == DataType::Type::kFloat64) {
4070       __ andpd(out, op2);
4071     } else {
4072       __ andps(out, op2);
4073     }
4074   }
4075   __ jmp(&done);
4076 
4077   // NaN handling.
4078   __ Bind(&nan);
4079   if (type == DataType::Type::kFloat64) {
4080     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4081   } else {
4082     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4083   }
4084   __ jmp(&done);
4085 
4086   // out := op2;
4087   __ Bind(&op2_label);
4088   if (type == DataType::Type::kFloat64) {
4089     __ movsd(out, op2);
4090   } else {
4091     __ movss(out, op2);
4092   }
4093 
4094   // Done.
4095   __ Bind(&done);
4096 }
4097 
4098 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4099   DataType::Type type = minmax->GetResultType();
4100   switch (type) {
4101     case DataType::Type::kInt32:
4102     case DataType::Type::kInt64:
4103       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4104       break;
4105     case DataType::Type::kFloat32:
4106     case DataType::Type::kFloat64:
4107       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4108       break;
4109     default:
4110       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4111   }
4112 }
4113 
4114 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4115   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4116 }
4117 
4118 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4119   GenerateMinMax(min, /*is_min*/ true);
4120 }
4121 
4122 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4123   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4124 }
4125 
4126 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4127   GenerateMinMax(max, /*is_min*/ false);
4128 }
4129 
4130 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4131   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4132   switch (abs->GetResultType()) {
4133     case DataType::Type::kInt32:
4134     case DataType::Type::kInt64:
4135       locations->SetInAt(0, Location::RequiresRegister());
4136       locations->SetOut(Location::SameAsFirstInput());
4137       locations->AddTemp(Location::RequiresRegister());
4138       break;
4139     case DataType::Type::kFloat32:
4140     case DataType::Type::kFloat64:
4141       locations->SetInAt(0, Location::RequiresFpuRegister());
4142       locations->SetOut(Location::SameAsFirstInput());
4143       locations->AddTemp(Location::RequiresFpuRegister());
4144       break;
4145     default:
4146       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4147   }
4148 }
4149 
4150 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4151   LocationSummary* locations = abs->GetLocations();
4152   switch (abs->GetResultType()) {
4153     case DataType::Type::kInt32: {
4154       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4155       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4156       // Create mask.
4157       __ movl(mask, out);
4158       __ sarl(mask, Immediate(31));
4159       // Add mask.
4160       __ addl(out, mask);
4161       __ xorl(out, mask);
4162       break;
4163     }
4164     case DataType::Type::kInt64: {
4165       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4166       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4167       // Create mask.
4168       __ movq(mask, out);
4169       __ sarq(mask, Immediate(63));
4170       // Add mask.
4171       __ addq(out, mask);
4172       __ xorq(out, mask);
4173       break;
4174     }
4175     case DataType::Type::kFloat32: {
4176       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4177       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4178       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4179       __ andps(out, mask);
4180       break;
4181     }
4182     case DataType::Type::kFloat64: {
4183       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4184       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4185       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4186       __ andpd(out, mask);
4187       break;
4188     }
4189     default:
4190       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4191   }
4192 }
4193 
4194 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4195   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4196   locations->SetInAt(0, Location::Any());
4197 }
4198 
4199 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4200   SlowPathCode* slow_path =
4201       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4202   codegen_->AddSlowPath(slow_path);
4203 
4204   LocationSummary* locations = instruction->GetLocations();
4205   Location value = locations->InAt(0);
4206 
4207   switch (instruction->GetType()) {
4208     case DataType::Type::kBool:
4209     case DataType::Type::kUint8:
4210     case DataType::Type::kInt8:
4211     case DataType::Type::kUint16:
4212     case DataType::Type::kInt16:
4213     case DataType::Type::kInt32: {
4214       if (value.IsRegister()) {
4215         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4216         __ j(kEqual, slow_path->GetEntryLabel());
4217       } else if (value.IsStackSlot()) {
4218         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4219         __ j(kEqual, slow_path->GetEntryLabel());
4220       } else {
4221         DCHECK(value.IsConstant()) << value;
4222         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4223           __ jmp(slow_path->GetEntryLabel());
4224         }
4225       }
4226       break;
4227     }
4228     case DataType::Type::kInt64: {
4229       if (value.IsRegister()) {
4230         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4231         __ j(kEqual, slow_path->GetEntryLabel());
4232       } else if (value.IsDoubleStackSlot()) {
4233         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4234         __ j(kEqual, slow_path->GetEntryLabel());
4235       } else {
4236         DCHECK(value.IsConstant()) << value;
4237         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4238           __ jmp(slow_path->GetEntryLabel());
4239         }
4240       }
4241       break;
4242     }
4243     default:
4244       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4245   }
4246 }
4247 
4248 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4249   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4250 
4251   LocationSummary* locations =
4252       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4253 
4254   switch (op->GetResultType()) {
4255     case DataType::Type::kInt32:
4256     case DataType::Type::kInt64: {
4257       locations->SetInAt(0, Location::RequiresRegister());
4258       // The shift count needs to be in CL.
4259       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4260       locations->SetOut(Location::SameAsFirstInput());
4261       break;
4262     }
4263     default:
4264       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4265   }
4266 }
4267 
4268 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4269   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4270 
4271   LocationSummary* locations = op->GetLocations();
4272   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4273   Location second = locations->InAt(1);
4274 
4275   switch (op->GetResultType()) {
4276     case DataType::Type::kInt32: {
4277       if (second.IsRegister()) {
4278         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4279         if (op->IsShl()) {
4280           __ shll(first_reg, second_reg);
4281         } else if (op->IsShr()) {
4282           __ sarl(first_reg, second_reg);
4283         } else {
4284           __ shrl(first_reg, second_reg);
4285         }
4286       } else {
4287         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4288         if (op->IsShl()) {
4289           __ shll(first_reg, imm);
4290         } else if (op->IsShr()) {
4291           __ sarl(first_reg, imm);
4292         } else {
4293           __ shrl(first_reg, imm);
4294         }
4295       }
4296       break;
4297     }
4298     case DataType::Type::kInt64: {
4299       if (second.IsRegister()) {
4300         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4301         if (op->IsShl()) {
4302           __ shlq(first_reg, second_reg);
4303         } else if (op->IsShr()) {
4304           __ sarq(first_reg, second_reg);
4305         } else {
4306           __ shrq(first_reg, second_reg);
4307         }
4308       } else {
4309         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4310         if (op->IsShl()) {
4311           __ shlq(first_reg, imm);
4312         } else if (op->IsShr()) {
4313           __ sarq(first_reg, imm);
4314         } else {
4315           __ shrq(first_reg, imm);
4316         }
4317       }
4318       break;
4319     }
4320     default:
4321       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4322       UNREACHABLE();
4323   }
4324 }
4325 
4326 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4327   LocationSummary* locations =
4328       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4329 
4330   switch (ror->GetResultType()) {
4331     case DataType::Type::kInt32:
4332     case DataType::Type::kInt64: {
4333       locations->SetInAt(0, Location::RequiresRegister());
4334       // The shift count needs to be in CL (unless it is a constant).
4335       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4336       locations->SetOut(Location::SameAsFirstInput());
4337       break;
4338     }
4339     default:
4340       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4341       UNREACHABLE();
4342   }
4343 }
4344 
4345 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4346   LocationSummary* locations = ror->GetLocations();
4347   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4348   Location second = locations->InAt(1);
4349 
4350   switch (ror->GetResultType()) {
4351     case DataType::Type::kInt32:
4352       if (second.IsRegister()) {
4353         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4354         __ rorl(first_reg, second_reg);
4355       } else {
4356         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4357         __ rorl(first_reg, imm);
4358       }
4359       break;
4360     case DataType::Type::kInt64:
4361       if (second.IsRegister()) {
4362         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4363         __ rorq(first_reg, second_reg);
4364       } else {
4365         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4366         __ rorq(first_reg, imm);
4367       }
4368       break;
4369     default:
4370       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4371       UNREACHABLE();
4372   }
4373 }
4374 
4375 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4376   HandleShift(shl);
4377 }
4378 
4379 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4380   HandleShift(shl);
4381 }
4382 
4383 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4384   HandleShift(shr);
4385 }
4386 
4387 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4388   HandleShift(shr);
4389 }
4390 
4391 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4392   HandleShift(ushr);
4393 }
4394 
4395 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4396   HandleShift(ushr);
4397 }
4398 
4399 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4400   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4401       instruction, LocationSummary::kCallOnMainOnly);
4402   InvokeRuntimeCallingConvention calling_convention;
4403   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4404   locations->SetOut(Location::RegisterLocation(RAX));
4405 }
4406 
4407 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4408   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4409   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4410   DCHECK(!codegen_->IsLeafMethod());
4411 }
4412 
4413 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4414   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4415       instruction, LocationSummary::kCallOnMainOnly);
4416   InvokeRuntimeCallingConvention calling_convention;
4417   locations->SetOut(Location::RegisterLocation(RAX));
4418   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4419   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4420 }
4421 
4422 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4423   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4424   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4425   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4426   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4427   DCHECK(!codegen_->IsLeafMethod());
4428 }
4429 
4430 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4431   LocationSummary* locations =
4432       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4433   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4434   if (location.IsStackSlot()) {
4435     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4436   } else if (location.IsDoubleStackSlot()) {
4437     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4438   }
4439   locations->SetOut(location);
4440 }
4441 
4442 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4443     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4444   // Nothing to do, the parameter is already at its location.
4445 }
4446 
4447 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4448   LocationSummary* locations =
4449       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4450   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4451 }
4452 
4453 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4454     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4455   // Nothing to do, the method is already at its location.
4456 }
4457 
4458 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4459   LocationSummary* locations =
4460       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4461   locations->SetInAt(0, Location::RequiresRegister());
4462   locations->SetOut(Location::RequiresRegister());
4463 }
4464 
4465 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4466   LocationSummary* locations = instruction->GetLocations();
4467   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4468     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4469         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4470     __ movq(locations->Out().AsRegister<CpuRegister>(),
4471             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4472   } else {
4473     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4474         instruction->GetIndex(), kX86_64PointerSize));
4475     __ movq(locations->Out().AsRegister<CpuRegister>(),
4476             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4477             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4478     __ movq(locations->Out().AsRegister<CpuRegister>(),
4479             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4480   }
4481 }
4482 
4483 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4484   LocationSummary* locations =
4485       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4486   locations->SetInAt(0, Location::RequiresRegister());
4487   locations->SetOut(Location::SameAsFirstInput());
4488 }
4489 
4490 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4491   LocationSummary* locations = not_->GetLocations();
4492   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4493             locations->Out().AsRegister<CpuRegister>().AsRegister());
4494   Location out = locations->Out();
4495   switch (not_->GetResultType()) {
4496     case DataType::Type::kInt32:
4497       __ notl(out.AsRegister<CpuRegister>());
4498       break;
4499 
4500     case DataType::Type::kInt64:
4501       __ notq(out.AsRegister<CpuRegister>());
4502       break;
4503 
4504     default:
4505       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4506   }
4507 }
4508 
4509 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4510   LocationSummary* locations =
4511       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4512   locations->SetInAt(0, Location::RequiresRegister());
4513   locations->SetOut(Location::SameAsFirstInput());
4514 }
4515 
4516 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4517   LocationSummary* locations = bool_not->GetLocations();
4518   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4519             locations->Out().AsRegister<CpuRegister>().AsRegister());
4520   Location out = locations->Out();
4521   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4522 }
4523 
4524 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4525   LocationSummary* locations =
4526       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4527   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4528     locations->SetInAt(i, Location::Any());
4529   }
4530   locations->SetOut(Location::Any());
4531 }
4532 
4533 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4534   LOG(FATAL) << "Unimplemented";
4535 }
4536 
4537 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4538   /*
4539    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4540    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4541    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4542    */
4543   switch (kind) {
4544     case MemBarrierKind::kAnyAny: {
4545       MemoryFence();
4546       break;
4547     }
4548     case MemBarrierKind::kAnyStore:
4549     case MemBarrierKind::kLoadAny:
4550     case MemBarrierKind::kStoreStore: {
4551       // nop
4552       break;
4553     }
4554     case MemBarrierKind::kNTStoreStore:
4555       // Non-Temporal Store/Store needs an explicit fence.
4556       MemoryFence(/* non-temporal= */ true);
4557       break;
4558   }
4559 }
4560 
4561 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4562   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4563 
4564   bool object_field_get_with_read_barrier =
4565       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4566   LocationSummary* locations =
4567       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4568                                                        object_field_get_with_read_barrier
4569                                                            ? LocationSummary::kCallOnSlowPath
4570                                                            : LocationSummary::kNoCall);
4571   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4572     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4573   }
4574   locations->SetInAt(0, Location::RequiresRegister());
4575   if (DataType::IsFloatingPointType(instruction->GetType())) {
4576     locations->SetOut(Location::RequiresFpuRegister());
4577   } else {
4578     // The output overlaps for an object field get when read barriers
4579     // are enabled: we do not want the move to overwrite the object's
4580     // location, as we need it to emit the read barrier.
4581     locations->SetOut(
4582         Location::RequiresRegister(),
4583         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4584   }
4585 }
4586 
4587 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4588                                                     const FieldInfo& field_info) {
4589   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4590 
4591   LocationSummary* locations = instruction->GetLocations();
4592   Location base_loc = locations->InAt(0);
4593   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4594   Location out = locations->Out();
4595   bool is_volatile = field_info.IsVolatile();
4596   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4597   DataType::Type load_type = instruction->GetType();
4598   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4599 
4600   switch (load_type) {
4601     case DataType::Type::kBool:
4602     case DataType::Type::kUint8: {
4603       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4604       break;
4605     }
4606 
4607     case DataType::Type::kInt8: {
4608       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4609       break;
4610     }
4611 
4612     case DataType::Type::kUint16: {
4613       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4614       break;
4615     }
4616 
4617     case DataType::Type::kInt16: {
4618       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4619       break;
4620     }
4621 
4622     case DataType::Type::kInt32: {
4623       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4624       break;
4625     }
4626 
4627     case DataType::Type::kReference: {
4628       // /* HeapReference<Object> */ out = *(base + offset)
4629       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4630         // Note that a potential implicit null check is handled in this
4631         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4632         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4633             instruction, out, base, offset, /* needs_null_check= */ true);
4634         if (is_volatile) {
4635           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4636         }
4637       } else {
4638         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4639         codegen_->MaybeRecordImplicitNullCheck(instruction);
4640         if (is_volatile) {
4641           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4642         }
4643         // If read barriers are enabled, emit read barriers other than
4644         // Baker's using a slow path (and also unpoison the loaded
4645         // reference, if heap poisoning is enabled).
4646         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4647       }
4648       break;
4649     }
4650 
4651     case DataType::Type::kInt64: {
4652       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4653       break;
4654     }
4655 
4656     case DataType::Type::kFloat32: {
4657       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4658       break;
4659     }
4660 
4661     case DataType::Type::kFloat64: {
4662       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4663       break;
4664     }
4665 
4666     case DataType::Type::kUint32:
4667     case DataType::Type::kUint64:
4668     case DataType::Type::kVoid:
4669       LOG(FATAL) << "Unreachable type " << load_type;
4670       UNREACHABLE();
4671   }
4672 
4673   if (load_type == DataType::Type::kReference) {
4674     // Potential implicit null checks, in the case of reference
4675     // fields, are handled in the previous switch statement.
4676   } else {
4677     codegen_->MaybeRecordImplicitNullCheck(instruction);
4678   }
4679 
4680   if (is_volatile) {
4681     if (load_type == DataType::Type::kReference) {
4682       // Memory barriers, in the case of references, are also handled
4683       // in the previous switch statement.
4684     } else {
4685       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4686     }
4687   }
4688 }
4689 
4690 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4691                                             const FieldInfo& field_info) {
4692   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4693 
4694   LocationSummary* locations =
4695       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4696   DataType::Type field_type = field_info.GetFieldType();
4697   bool is_volatile = field_info.IsVolatile();
4698   bool needs_write_barrier =
4699       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4700 
4701   locations->SetInAt(0, Location::RequiresRegister());
4702   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4703     if (is_volatile) {
4704       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4705       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4706     } else {
4707       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4708     }
4709   } else {
4710     if (is_volatile) {
4711       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4712       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4713     } else {
4714       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4715     }
4716   }
4717   if (needs_write_barrier) {
4718     // Temporary registers for the write barrier.
4719     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4720     locations->AddTemp(Location::RequiresRegister());
4721   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4722     // Temporary register for the reference poisoning.
4723     locations->AddTemp(Location::RequiresRegister());
4724   }
4725 }
4726 
4727 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4728                                                     const FieldInfo& field_info,
4729                                                     bool value_can_be_null) {
4730   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4731 
4732   LocationSummary* locations = instruction->GetLocations();
4733   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4734   Location value = locations->InAt(1);
4735   bool is_volatile = field_info.IsVolatile();
4736   DataType::Type field_type = field_info.GetFieldType();
4737   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4738 
4739   if (is_volatile) {
4740     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4741   }
4742 
4743   bool maybe_record_implicit_null_check_done = false;
4744 
4745   switch (field_type) {
4746     case DataType::Type::kBool:
4747     case DataType::Type::kUint8:
4748     case DataType::Type::kInt8: {
4749       if (value.IsConstant()) {
4750         __ movb(Address(base, offset),
4751                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4752       } else {
4753         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4754       }
4755       break;
4756     }
4757 
4758     case DataType::Type::kUint16:
4759     case DataType::Type::kInt16: {
4760       if (value.IsConstant()) {
4761         __ movw(Address(base, offset),
4762                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4763       } else {
4764         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4765       }
4766       break;
4767     }
4768 
4769     case DataType::Type::kInt32:
4770     case DataType::Type::kReference: {
4771       if (value.IsConstant()) {
4772         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4773         // `field_type == DataType::Type::kReference` implies `v == 0`.
4774         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4775         // Note: if heap poisoning is enabled, no need to poison
4776         // (negate) `v` if it is a reference, as it would be null.
4777         __ movl(Address(base, offset), Immediate(v));
4778       } else {
4779         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4780           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4781           __ movl(temp, value.AsRegister<CpuRegister>());
4782           __ PoisonHeapReference(temp);
4783           __ movl(Address(base, offset), temp);
4784         } else {
4785           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4786         }
4787       }
4788       break;
4789     }
4790 
4791     case DataType::Type::kInt64: {
4792       if (value.IsConstant()) {
4793         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4794         codegen_->MoveInt64ToAddress(Address(base, offset),
4795                                      Address(base, offset + sizeof(int32_t)),
4796                                      v,
4797                                      instruction);
4798         maybe_record_implicit_null_check_done = true;
4799       } else {
4800         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4801       }
4802       break;
4803     }
4804 
4805     case DataType::Type::kFloat32: {
4806       if (value.IsConstant()) {
4807         int32_t v =
4808             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4809         __ movl(Address(base, offset), Immediate(v));
4810       } else {
4811         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4812       }
4813       break;
4814     }
4815 
4816     case DataType::Type::kFloat64: {
4817       if (value.IsConstant()) {
4818         int64_t v =
4819             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4820         codegen_->MoveInt64ToAddress(Address(base, offset),
4821                                      Address(base, offset + sizeof(int32_t)),
4822                                      v,
4823                                      instruction);
4824         maybe_record_implicit_null_check_done = true;
4825       } else {
4826         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4827       }
4828       break;
4829     }
4830 
4831     case DataType::Type::kUint32:
4832     case DataType::Type::kUint64:
4833     case DataType::Type::kVoid:
4834       LOG(FATAL) << "Unreachable type " << field_type;
4835       UNREACHABLE();
4836   }
4837 
4838   if (!maybe_record_implicit_null_check_done) {
4839     codegen_->MaybeRecordImplicitNullCheck(instruction);
4840   }
4841 
4842   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4843     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4844     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4845     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4846   }
4847 
4848   if (is_volatile) {
4849     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4850   }
4851 }
4852 
4853 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4854   HandleFieldSet(instruction, instruction->GetFieldInfo());
4855 }
4856 
4857 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4858   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4859 }
4860 
4861 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4862   HandleFieldGet(instruction);
4863 }
4864 
4865 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4866   HandleFieldGet(instruction, instruction->GetFieldInfo());
4867 }
4868 
4869 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4870   HandleFieldGet(instruction);
4871 }
4872 
4873 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4874   HandleFieldGet(instruction, instruction->GetFieldInfo());
4875 }
4876 
4877 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4878   HandleFieldSet(instruction, instruction->GetFieldInfo());
4879 }
4880 
4881 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4882   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4883 }
4884 
4885 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4886     HUnresolvedInstanceFieldGet* instruction) {
4887   FieldAccessCallingConventionX86_64 calling_convention;
4888   codegen_->CreateUnresolvedFieldLocationSummary(
4889       instruction, instruction->GetFieldType(), calling_convention);
4890 }
4891 
4892 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4893     HUnresolvedInstanceFieldGet* instruction) {
4894   FieldAccessCallingConventionX86_64 calling_convention;
4895   codegen_->GenerateUnresolvedFieldAccess(instruction,
4896                                           instruction->GetFieldType(),
4897                                           instruction->GetFieldIndex(),
4898                                           instruction->GetDexPc(),
4899                                           calling_convention);
4900 }
4901 
4902 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4903     HUnresolvedInstanceFieldSet* instruction) {
4904   FieldAccessCallingConventionX86_64 calling_convention;
4905   codegen_->CreateUnresolvedFieldLocationSummary(
4906       instruction, instruction->GetFieldType(), calling_convention);
4907 }
4908 
4909 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4910     HUnresolvedInstanceFieldSet* instruction) {
4911   FieldAccessCallingConventionX86_64 calling_convention;
4912   codegen_->GenerateUnresolvedFieldAccess(instruction,
4913                                           instruction->GetFieldType(),
4914                                           instruction->GetFieldIndex(),
4915                                           instruction->GetDexPc(),
4916                                           calling_convention);
4917 }
4918 
4919 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4920     HUnresolvedStaticFieldGet* instruction) {
4921   FieldAccessCallingConventionX86_64 calling_convention;
4922   codegen_->CreateUnresolvedFieldLocationSummary(
4923       instruction, instruction->GetFieldType(), calling_convention);
4924 }
4925 
4926 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4927     HUnresolvedStaticFieldGet* instruction) {
4928   FieldAccessCallingConventionX86_64 calling_convention;
4929   codegen_->GenerateUnresolvedFieldAccess(instruction,
4930                                           instruction->GetFieldType(),
4931                                           instruction->GetFieldIndex(),
4932                                           instruction->GetDexPc(),
4933                                           calling_convention);
4934 }
4935 
4936 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4937     HUnresolvedStaticFieldSet* instruction) {
4938   FieldAccessCallingConventionX86_64 calling_convention;
4939   codegen_->CreateUnresolvedFieldLocationSummary(
4940       instruction, instruction->GetFieldType(), calling_convention);
4941 }
4942 
4943 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4944     HUnresolvedStaticFieldSet* instruction) {
4945   FieldAccessCallingConventionX86_64 calling_convention;
4946   codegen_->GenerateUnresolvedFieldAccess(instruction,
4947                                           instruction->GetFieldType(),
4948                                           instruction->GetFieldIndex(),
4949                                           instruction->GetDexPc(),
4950                                           calling_convention);
4951 }
4952 
4953 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4954   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4955   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4956       ? Location::RequiresRegister()
4957       : Location::Any();
4958   locations->SetInAt(0, loc);
4959 }
4960 
4961 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4962   if (CanMoveNullCheckToUser(instruction)) {
4963     return;
4964   }
4965   LocationSummary* locations = instruction->GetLocations();
4966   Location obj = locations->InAt(0);
4967 
4968   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4969   RecordPcInfo(instruction, instruction->GetDexPc());
4970 }
4971 
4972 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4973   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
4974   AddSlowPath(slow_path);
4975 
4976   LocationSummary* locations = instruction->GetLocations();
4977   Location obj = locations->InAt(0);
4978 
4979   if (obj.IsRegister()) {
4980     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4981   } else if (obj.IsStackSlot()) {
4982     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4983   } else {
4984     DCHECK(obj.IsConstant()) << obj;
4985     DCHECK(obj.GetConstant()->IsNullConstant());
4986     __ jmp(slow_path->GetEntryLabel());
4987     return;
4988   }
4989   __ j(kEqual, slow_path->GetEntryLabel());
4990 }
4991 
4992 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4993   codegen_->GenerateNullCheck(instruction);
4994 }
4995 
4996 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4997   bool object_array_get_with_read_barrier =
4998       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4999   LocationSummary* locations =
5000       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5001                                                        object_array_get_with_read_barrier
5002                                                            ? LocationSummary::kCallOnSlowPath
5003                                                            : LocationSummary::kNoCall);
5004   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5005     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5006   }
5007   locations->SetInAt(0, Location::RequiresRegister());
5008   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5009   if (DataType::IsFloatingPointType(instruction->GetType())) {
5010     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5011   } else {
5012     // The output overlaps for an object array get when read barriers
5013     // are enabled: we do not want the move to overwrite the array's
5014     // location, as we need it to emit the read barrier.
5015     locations->SetOut(
5016         Location::RequiresRegister(),
5017         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5018   }
5019 }
5020 
5021 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5022   LocationSummary* locations = instruction->GetLocations();
5023   Location obj_loc = locations->InAt(0);
5024   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5025   Location index = locations->InAt(1);
5026   Location out_loc = locations->Out();
5027   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5028 
5029   DataType::Type type = instruction->GetType();
5030   switch (type) {
5031     case DataType::Type::kBool:
5032     case DataType::Type::kUint8: {
5033       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5034       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5035       break;
5036     }
5037 
5038     case DataType::Type::kInt8: {
5039       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5040       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5041       break;
5042     }
5043 
5044     case DataType::Type::kUint16: {
5045       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5046       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5047         // Branch cases into compressed and uncompressed for each index's type.
5048         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5049         NearLabel done, not_compressed;
5050         __ testb(Address(obj, count_offset), Immediate(1));
5051         codegen_->MaybeRecordImplicitNullCheck(instruction);
5052         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5053                       "Expecting 0=compressed, 1=uncompressed");
5054         __ j(kNotZero, &not_compressed);
5055         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5056         __ jmp(&done);
5057         __ Bind(&not_compressed);
5058         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5059         __ Bind(&done);
5060       } else {
5061         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5062       }
5063       break;
5064     }
5065 
5066     case DataType::Type::kInt16: {
5067       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5068       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5069       break;
5070     }
5071 
5072     case DataType::Type::kInt32: {
5073       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5074       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5075       break;
5076     }
5077 
5078     case DataType::Type::kReference: {
5079       static_assert(
5080           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5081           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5082       // /* HeapReference<Object> */ out =
5083       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5084       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5085         // Note that a potential implicit null check is handled in this
5086         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5087         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5088             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5089       } else {
5090         CpuRegister out = out_loc.AsRegister<CpuRegister>();
5091         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5092         codegen_->MaybeRecordImplicitNullCheck(instruction);
5093         // If read barriers are enabled, emit read barriers other than
5094         // Baker's using a slow path (and also unpoison the loaded
5095         // reference, if heap poisoning is enabled).
5096         if (index.IsConstant()) {
5097           uint32_t offset =
5098               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5099           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5100         } else {
5101           codegen_->MaybeGenerateReadBarrierSlow(
5102               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5103         }
5104       }
5105       break;
5106     }
5107 
5108     case DataType::Type::kInt64: {
5109       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5110       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5111       break;
5112     }
5113 
5114     case DataType::Type::kFloat32: {
5115       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5116       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5117       break;
5118     }
5119 
5120     case DataType::Type::kFloat64: {
5121       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5122       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5123       break;
5124     }
5125 
5126     case DataType::Type::kUint32:
5127     case DataType::Type::kUint64:
5128     case DataType::Type::kVoid:
5129       LOG(FATAL) << "Unreachable type " << type;
5130       UNREACHABLE();
5131   }
5132 
5133   if (type == DataType::Type::kReference) {
5134     // Potential implicit null checks, in the case of reference
5135     // arrays, are handled in the previous switch statement.
5136   } else {
5137     codegen_->MaybeRecordImplicitNullCheck(instruction);
5138   }
5139 }
5140 
5141 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5142   DataType::Type value_type = instruction->GetComponentType();
5143 
5144   bool needs_write_barrier =
5145       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5146   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5147 
5148   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5149       instruction,
5150       may_need_runtime_call_for_type_check ?
5151           LocationSummary::kCallOnSlowPath :
5152           LocationSummary::kNoCall);
5153 
5154   locations->SetInAt(0, Location::RequiresRegister());
5155   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5156   if (DataType::IsFloatingPointType(value_type)) {
5157     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5158   } else {
5159     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5160   }
5161 
5162   if (needs_write_barrier) {
5163     // Temporary registers for the write barrier.
5164     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5165     locations->AddTemp(Location::RequiresRegister());
5166   }
5167 }
5168 
5169 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5170   LocationSummary* locations = instruction->GetLocations();
5171   Location array_loc = locations->InAt(0);
5172   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5173   Location index = locations->InAt(1);
5174   Location value = locations->InAt(2);
5175   DataType::Type value_type = instruction->GetComponentType();
5176   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5177   bool needs_write_barrier =
5178       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5179   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5180   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5181   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5182 
5183   switch (value_type) {
5184     case DataType::Type::kBool:
5185     case DataType::Type::kUint8:
5186     case DataType::Type::kInt8: {
5187       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5188       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5189       if (value.IsRegister()) {
5190         __ movb(address, value.AsRegister<CpuRegister>());
5191       } else {
5192         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5193       }
5194       codegen_->MaybeRecordImplicitNullCheck(instruction);
5195       break;
5196     }
5197 
5198     case DataType::Type::kUint16:
5199     case DataType::Type::kInt16: {
5200       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5201       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5202       if (value.IsRegister()) {
5203         __ movw(address, value.AsRegister<CpuRegister>());
5204       } else {
5205         DCHECK(value.IsConstant()) << value;
5206         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5207       }
5208       codegen_->MaybeRecordImplicitNullCheck(instruction);
5209       break;
5210     }
5211 
5212     case DataType::Type::kReference: {
5213       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5214       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5215 
5216       if (!value.IsRegister()) {
5217         // Just setting null.
5218         DCHECK(instruction->InputAt(2)->IsNullConstant());
5219         DCHECK(value.IsConstant()) << value;
5220         __ movl(address, Immediate(0));
5221         codegen_->MaybeRecordImplicitNullCheck(instruction);
5222         DCHECK(!needs_write_barrier);
5223         DCHECK(!may_need_runtime_call_for_type_check);
5224         break;
5225       }
5226 
5227       DCHECK(needs_write_barrier);
5228       CpuRegister register_value = value.AsRegister<CpuRegister>();
5229       // We cannot use a NearLabel for `done`, as its range may be too
5230       // short when Baker read barriers are enabled.
5231       Label done;
5232       NearLabel not_null, do_put;
5233       SlowPathCode* slow_path = nullptr;
5234       Location temp_loc = locations->GetTemp(0);
5235       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5236       if (may_need_runtime_call_for_type_check) {
5237         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5238         codegen_->AddSlowPath(slow_path);
5239         if (instruction->GetValueCanBeNull()) {
5240           __ testl(register_value, register_value);
5241           __ j(kNotEqual, &not_null);
5242           __ movl(address, Immediate(0));
5243           codegen_->MaybeRecordImplicitNullCheck(instruction);
5244           __ jmp(&done);
5245           __ Bind(&not_null);
5246         }
5247 
5248         // Note that when Baker read barriers are enabled, the type
5249         // checks are performed without read barriers.  This is fine,
5250         // even in the case where a class object is in the from-space
5251         // after the flip, as a comparison involving such a type would
5252         // not produce a false positive; it may of course produce a
5253         // false negative, in which case we would take the ArraySet
5254         // slow path.
5255 
5256         // /* HeapReference<Class> */ temp = array->klass_
5257         __ movl(temp, Address(array, class_offset));
5258         codegen_->MaybeRecordImplicitNullCheck(instruction);
5259         __ MaybeUnpoisonHeapReference(temp);
5260 
5261         // /* HeapReference<Class> */ temp = temp->component_type_
5262         __ movl(temp, Address(temp, component_offset));
5263         // If heap poisoning is enabled, no need to unpoison `temp`
5264         // nor the object reference in `register_value->klass`, as
5265         // we are comparing two poisoned references.
5266         __ cmpl(temp, Address(register_value, class_offset));
5267 
5268         if (instruction->StaticTypeOfArrayIsObjectArray()) {
5269           __ j(kEqual, &do_put);
5270           // If heap poisoning is enabled, the `temp` reference has
5271           // not been unpoisoned yet; unpoison it now.
5272           __ MaybeUnpoisonHeapReference(temp);
5273 
5274           // If heap poisoning is enabled, no need to unpoison the
5275           // heap reference loaded below, as it is only used for a
5276           // comparison with null.
5277           __ cmpl(Address(temp, super_offset), Immediate(0));
5278           __ j(kNotEqual, slow_path->GetEntryLabel());
5279           __ Bind(&do_put);
5280         } else {
5281           __ j(kNotEqual, slow_path->GetEntryLabel());
5282         }
5283       }
5284 
5285       if (kPoisonHeapReferences) {
5286         __ movl(temp, register_value);
5287         __ PoisonHeapReference(temp);
5288         __ movl(address, temp);
5289       } else {
5290         __ movl(address, register_value);
5291       }
5292       if (!may_need_runtime_call_for_type_check) {
5293         codegen_->MaybeRecordImplicitNullCheck(instruction);
5294       }
5295 
5296       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5297       codegen_->MarkGCCard(
5298           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
5299       __ Bind(&done);
5300 
5301       if (slow_path != nullptr) {
5302         __ Bind(slow_path->GetExitLabel());
5303       }
5304 
5305       break;
5306     }
5307 
5308     case DataType::Type::kInt32: {
5309       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5310       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5311       if (value.IsRegister()) {
5312         __ movl(address, value.AsRegister<CpuRegister>());
5313       } else {
5314         DCHECK(value.IsConstant()) << value;
5315         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5316         __ movl(address, Immediate(v));
5317       }
5318       codegen_->MaybeRecordImplicitNullCheck(instruction);
5319       break;
5320     }
5321 
5322     case DataType::Type::kInt64: {
5323       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5324       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5325       if (value.IsRegister()) {
5326         __ movq(address, value.AsRegister<CpuRegister>());
5327         codegen_->MaybeRecordImplicitNullCheck(instruction);
5328       } else {
5329         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5330         Address address_high =
5331             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5332         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5333       }
5334       break;
5335     }
5336 
5337     case DataType::Type::kFloat32: {
5338       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5339       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5340       if (value.IsFpuRegister()) {
5341         __ movss(address, value.AsFpuRegister<XmmRegister>());
5342       } else {
5343         DCHECK(value.IsConstant());
5344         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5345         __ movl(address, Immediate(v));
5346       }
5347       codegen_->MaybeRecordImplicitNullCheck(instruction);
5348       break;
5349     }
5350 
5351     case DataType::Type::kFloat64: {
5352       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5353       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5354       if (value.IsFpuRegister()) {
5355         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5356         codegen_->MaybeRecordImplicitNullCheck(instruction);
5357       } else {
5358         int64_t v =
5359             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5360         Address address_high =
5361             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5362         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5363       }
5364       break;
5365     }
5366 
5367     case DataType::Type::kUint32:
5368     case DataType::Type::kUint64:
5369     case DataType::Type::kVoid:
5370       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5371       UNREACHABLE();
5372   }
5373 }
5374 
5375 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5376   LocationSummary* locations =
5377       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5378   locations->SetInAt(0, Location::RequiresRegister());
5379   if (!instruction->IsEmittedAtUseSite()) {
5380     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5381   }
5382 }
5383 
5384 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5385   if (instruction->IsEmittedAtUseSite()) {
5386     return;
5387   }
5388 
5389   LocationSummary* locations = instruction->GetLocations();
5390   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5391   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5392   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5393   __ movl(out, Address(obj, offset));
5394   codegen_->MaybeRecordImplicitNullCheck(instruction);
5395   // Mask out most significant bit in case the array is String's array of char.
5396   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5397     __ shrl(out, Immediate(1));
5398   }
5399 }
5400 
5401 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5402   RegisterSet caller_saves = RegisterSet::Empty();
5403   InvokeRuntimeCallingConvention calling_convention;
5404   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5405   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5406   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5407   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5408   HInstruction* length = instruction->InputAt(1);
5409   if (!length->IsEmittedAtUseSite()) {
5410     locations->SetInAt(1, Location::RegisterOrConstant(length));
5411   }
5412 }
5413 
5414 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5415   LocationSummary* locations = instruction->GetLocations();
5416   Location index_loc = locations->InAt(0);
5417   Location length_loc = locations->InAt(1);
5418   SlowPathCode* slow_path =
5419       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5420 
5421   if (length_loc.IsConstant()) {
5422     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5423     if (index_loc.IsConstant()) {
5424       // BCE will remove the bounds check if we are guarenteed to pass.
5425       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5426       if (index < 0 || index >= length) {
5427         codegen_->AddSlowPath(slow_path);
5428         __ jmp(slow_path->GetEntryLabel());
5429       } else {
5430         // Some optimization after BCE may have generated this, and we should not
5431         // generate a bounds check if it is a valid range.
5432       }
5433       return;
5434     }
5435 
5436     // We have to reverse the jump condition because the length is the constant.
5437     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5438     __ cmpl(index_reg, Immediate(length));
5439     codegen_->AddSlowPath(slow_path);
5440     __ j(kAboveEqual, slow_path->GetEntryLabel());
5441   } else {
5442     HInstruction* array_length = instruction->InputAt(1);
5443     if (array_length->IsEmittedAtUseSite()) {
5444       // Address the length field in the array.
5445       DCHECK(array_length->IsArrayLength());
5446       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5447       Location array_loc = array_length->GetLocations()->InAt(0);
5448       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5449       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5450         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5451         // the string compression flag) with the in-memory length and avoid the temporary.
5452         CpuRegister length_reg = CpuRegister(TMP);
5453         __ movl(length_reg, array_len);
5454         codegen_->MaybeRecordImplicitNullCheck(array_length);
5455         __ shrl(length_reg, Immediate(1));
5456         codegen_->GenerateIntCompare(length_reg, index_loc);
5457       } else {
5458         // Checking the bound for general case:
5459         // Array of char or String's array when the compression feature off.
5460         if (index_loc.IsConstant()) {
5461           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5462           __ cmpl(array_len, Immediate(value));
5463         } else {
5464           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5465         }
5466         codegen_->MaybeRecordImplicitNullCheck(array_length);
5467       }
5468     } else {
5469       codegen_->GenerateIntCompare(length_loc, index_loc);
5470     }
5471     codegen_->AddSlowPath(slow_path);
5472     __ j(kBelowEqual, slow_path->GetEntryLabel());
5473   }
5474 }
5475 
5476 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5477                                      CpuRegister card,
5478                                      CpuRegister object,
5479                                      CpuRegister value,
5480                                      bool value_can_be_null) {
5481   NearLabel is_null;
5482   if (value_can_be_null) {
5483     __ testl(value, value);
5484     __ j(kEqual, &is_null);
5485   }
5486   // Load the address of the card table into `card`.
5487   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5488                                         /* no_rip= */ true));
5489   // Calculate the offset (in the card table) of the card corresponding to
5490   // `object`.
5491   __ movq(temp, object);
5492   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5493   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5494   // `object`'s card.
5495   //
5496   // Register `card` contains the address of the card table. Note that the card
5497   // table's base is biased during its creation so that it always starts at an
5498   // address whose least-significant byte is equal to `kCardDirty` (see
5499   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5500   // below writes the `kCardDirty` (byte) value into the `object`'s card
5501   // (located at `card + object >> kCardShift`).
5502   //
5503   // This dual use of the value in register `card` (1. to calculate the location
5504   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5505   // (no need to explicitly load `kCardDirty` as an immediate value).
5506   __ movb(Address(temp, card, TIMES_1, 0), card);
5507   if (value_can_be_null) {
5508     __ Bind(&is_null);
5509   }
5510 }
5511 
5512 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5513   LOG(FATAL) << "Unimplemented";
5514 }
5515 
5516 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5517   if (instruction->GetNext()->IsSuspendCheck() &&
5518       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5519     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5520     // The back edge will generate the suspend check.
5521     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5522   }
5523 
5524   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5525 }
5526 
5527 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5528   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5529       instruction, LocationSummary::kCallOnSlowPath);
5530   // In suspend check slow path, usually there are no caller-save registers at all.
5531   // If SIMD instructions are present, however, we force spilling all live SIMD
5532   // registers in full width (since the runtime only saves/restores lower part).
5533   locations->SetCustomSlowPathCallerSaves(
5534       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5535 }
5536 
5537 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5538   HBasicBlock* block = instruction->GetBlock();
5539   if (block->GetLoopInformation() != nullptr) {
5540     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5541     // The back edge will generate the suspend check.
5542     return;
5543   }
5544   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5545     // The goto will generate the suspend check.
5546     return;
5547   }
5548   GenerateSuspendCheck(instruction, nullptr);
5549 }
5550 
5551 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5552                                                           HBasicBlock* successor) {
5553   SuspendCheckSlowPathX86_64* slow_path =
5554       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5555   if (slow_path == nullptr) {
5556     slow_path =
5557         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5558     instruction->SetSlowPath(slow_path);
5559     codegen_->AddSlowPath(slow_path);
5560     if (successor != nullptr) {
5561       DCHECK(successor->IsLoopHeader());
5562     }
5563   } else {
5564     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5565   }
5566 
5567   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5568                                   /* no_rip= */ true),
5569                 Immediate(0));
5570   if (successor == nullptr) {
5571     __ j(kNotEqual, slow_path->GetEntryLabel());
5572     __ Bind(slow_path->GetReturnLabel());
5573   } else {
5574     __ j(kEqual, codegen_->GetLabelOf(successor));
5575     __ jmp(slow_path->GetEntryLabel());
5576   }
5577 }
5578 
5579 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5580   return codegen_->GetAssembler();
5581 }
5582 
5583 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5584   MoveOperands* move = moves_[index];
5585   Location source = move->GetSource();
5586   Location destination = move->GetDestination();
5587 
5588   if (source.IsRegister()) {
5589     if (destination.IsRegister()) {
5590       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5591     } else if (destination.IsStackSlot()) {
5592       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5593               source.AsRegister<CpuRegister>());
5594     } else {
5595       DCHECK(destination.IsDoubleStackSlot());
5596       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5597               source.AsRegister<CpuRegister>());
5598     }
5599   } else if (source.IsStackSlot()) {
5600     if (destination.IsRegister()) {
5601       __ movl(destination.AsRegister<CpuRegister>(),
5602               Address(CpuRegister(RSP), source.GetStackIndex()));
5603     } else if (destination.IsFpuRegister()) {
5604       __ movss(destination.AsFpuRegister<XmmRegister>(),
5605               Address(CpuRegister(RSP), source.GetStackIndex()));
5606     } else {
5607       DCHECK(destination.IsStackSlot());
5608       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5609       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5610     }
5611   } else if (source.IsDoubleStackSlot()) {
5612     if (destination.IsRegister()) {
5613       __ movq(destination.AsRegister<CpuRegister>(),
5614               Address(CpuRegister(RSP), source.GetStackIndex()));
5615     } else if (destination.IsFpuRegister()) {
5616       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5617                Address(CpuRegister(RSP), source.GetStackIndex()));
5618     } else {
5619       DCHECK(destination.IsDoubleStackSlot()) << destination;
5620       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5621       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5622     }
5623   } else if (source.IsSIMDStackSlot()) {
5624     if (destination.IsFpuRegister()) {
5625       __ movups(destination.AsFpuRegister<XmmRegister>(),
5626                 Address(CpuRegister(RSP), source.GetStackIndex()));
5627     } else {
5628       DCHECK(destination.IsSIMDStackSlot());
5629       size_t high = kX86_64WordSize;
5630       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5631       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5632       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5633       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5634     }
5635   } else if (source.IsConstant()) {
5636     HConstant* constant = source.GetConstant();
5637     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5638       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5639       if (destination.IsRegister()) {
5640         if (value == 0) {
5641           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5642         } else {
5643           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5644         }
5645       } else {
5646         DCHECK(destination.IsStackSlot()) << destination;
5647         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5648       }
5649     } else if (constant->IsLongConstant()) {
5650       int64_t value = constant->AsLongConstant()->GetValue();
5651       if (destination.IsRegister()) {
5652         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5653       } else {
5654         DCHECK(destination.IsDoubleStackSlot()) << destination;
5655         codegen_->Store64BitValueToStack(destination, value);
5656       }
5657     } else if (constant->IsFloatConstant()) {
5658       float fp_value = constant->AsFloatConstant()->GetValue();
5659       if (destination.IsFpuRegister()) {
5660         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5661         codegen_->Load32BitValue(dest, fp_value);
5662       } else {
5663         DCHECK(destination.IsStackSlot()) << destination;
5664         Immediate imm(bit_cast<int32_t, float>(fp_value));
5665         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5666       }
5667     } else {
5668       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5669       double fp_value =  constant->AsDoubleConstant()->GetValue();
5670       int64_t value = bit_cast<int64_t, double>(fp_value);
5671       if (destination.IsFpuRegister()) {
5672         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5673         codegen_->Load64BitValue(dest, fp_value);
5674       } else {
5675         DCHECK(destination.IsDoubleStackSlot()) << destination;
5676         codegen_->Store64BitValueToStack(destination, value);
5677       }
5678     }
5679   } else if (source.IsFpuRegister()) {
5680     if (destination.IsFpuRegister()) {
5681       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5682     } else if (destination.IsStackSlot()) {
5683       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5684                source.AsFpuRegister<XmmRegister>());
5685     } else if (destination.IsDoubleStackSlot()) {
5686       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5687                source.AsFpuRegister<XmmRegister>());
5688     } else {
5689        DCHECK(destination.IsSIMDStackSlot());
5690       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5691                 source.AsFpuRegister<XmmRegister>());
5692     }
5693   }
5694 }
5695 
5696 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5697   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5698   __ movl(Address(CpuRegister(RSP), mem), reg);
5699   __ movl(reg, CpuRegister(TMP));
5700 }
5701 
5702 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5703   __ movq(CpuRegister(TMP), reg1);
5704   __ movq(reg1, reg2);
5705   __ movq(reg2, CpuRegister(TMP));
5706 }
5707 
5708 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5709   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5710   __ movq(Address(CpuRegister(RSP), mem), reg);
5711   __ movq(reg, CpuRegister(TMP));
5712 }
5713 
5714 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5715   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5716   __ movss(Address(CpuRegister(RSP), mem), reg);
5717   __ movd(reg, CpuRegister(TMP));
5718 }
5719 
5720 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5721   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5722   __ movsd(Address(CpuRegister(RSP), mem), reg);
5723   __ movd(reg, CpuRegister(TMP));
5724 }
5725 
5726 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5727   size_t extra_slot = 2 * kX86_64WordSize;
5728   __ subq(CpuRegister(RSP), Immediate(extra_slot));
5729   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5730   ExchangeMemory64(0, mem + extra_slot, 2);
5731   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5732   __ addq(CpuRegister(RSP), Immediate(extra_slot));
5733 }
5734 
5735 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5736   ScratchRegisterScope ensure_scratch(
5737       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5738 
5739   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5740   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5741   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5742           Address(CpuRegister(RSP), mem2 + stack_offset));
5743   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5744   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5745           CpuRegister(ensure_scratch.GetRegister()));
5746 }
5747 
5748 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5749   ScratchRegisterScope ensure_scratch(
5750       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5751 
5752   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5753 
5754   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5755   for (int i = 0; i < num_of_qwords; i++) {
5756     __ movq(CpuRegister(TMP),
5757             Address(CpuRegister(RSP), mem1 + stack_offset));
5758     __ movq(CpuRegister(ensure_scratch.GetRegister()),
5759             Address(CpuRegister(RSP), mem2 + stack_offset));
5760     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5761             CpuRegister(TMP));
5762     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5763             CpuRegister(ensure_scratch.GetRegister()));
5764     stack_offset += kX86_64WordSize;
5765   }
5766 }
5767 
5768 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5769   MoveOperands* move = moves_[index];
5770   Location source = move->GetSource();
5771   Location destination = move->GetDestination();
5772 
5773   if (source.IsRegister() && destination.IsRegister()) {
5774     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5775   } else if (source.IsRegister() && destination.IsStackSlot()) {
5776     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5777   } else if (source.IsStackSlot() && destination.IsRegister()) {
5778     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5779   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5780     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5781   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5782     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5783   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5784     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5785   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5786     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5787   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5788     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5789     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5790     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5791   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5792     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5793   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5794     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5795   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5796     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5797   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5798     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5799   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5800     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5801   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5802     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5803   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5804     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5805   } else {
5806     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5807   }
5808 }
5809 
5810 
5811 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5812   __ pushq(CpuRegister(reg));
5813 }
5814 
5815 
5816 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5817   __ popq(CpuRegister(reg));
5818 }
5819 
5820 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5821     SlowPathCode* slow_path, CpuRegister class_reg) {
5822   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5823   const size_t status_byte_offset =
5824       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5825   constexpr uint32_t shifted_initialized_value =
5826       enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
5827 
5828   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
5829   __ j(kBelow, slow_path->GetEntryLabel());
5830   __ Bind(slow_path->GetExitLabel());
5831   // No need for memory fence, thanks to the x86-64 memory model.
5832 }
5833 
5834 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
5835                                                                        CpuRegister temp) {
5836   uint32_t path_to_root = check->GetBitstringPathToRoot();
5837   uint32_t mask = check->GetBitstringMask();
5838   DCHECK(IsPowerOfTwo(mask + 1));
5839   size_t mask_bits = WhichPowerOf2(mask + 1);
5840 
5841   if (mask_bits == 16u) {
5842     // Compare the bitstring in memory.
5843     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
5844   } else {
5845     // /* uint32_t */ temp = temp->status_
5846     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
5847     // Compare the bitstring bits using SUB.
5848     __ subl(temp, Immediate(path_to_root));
5849     // Shift out bits that do not contribute to the comparison.
5850     __ shll(temp, Immediate(32u - mask_bits));
5851   }
5852 }
5853 
5854 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5855     HLoadClass::LoadKind desired_class_load_kind) {
5856   switch (desired_class_load_kind) {
5857     case HLoadClass::LoadKind::kInvalid:
5858       LOG(FATAL) << "UNREACHABLE";
5859       UNREACHABLE();
5860     case HLoadClass::LoadKind::kReferrersClass:
5861       break;
5862     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5863     case HLoadClass::LoadKind::kBootImageRelRo:
5864     case HLoadClass::LoadKind::kBssEntry:
5865       DCHECK(!Runtime::Current()->UseJitCompilation());
5866       break;
5867     case HLoadClass::LoadKind::kJitBootImageAddress:
5868     case HLoadClass::LoadKind::kJitTableAddress:
5869       DCHECK(Runtime::Current()->UseJitCompilation());
5870       break;
5871     case HLoadClass::LoadKind::kRuntimeCall:
5872       break;
5873   }
5874   return desired_class_load_kind;
5875 }
5876 
5877 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5878   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5879   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5880     // Custom calling convention: RAX serves as both input and output.
5881     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5882         cls,
5883         Location::RegisterLocation(RAX),
5884         Location::RegisterLocation(RAX));
5885     return;
5886   }
5887   DCHECK(!cls->NeedsAccessCheck());
5888 
5889   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5890   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5891       ? LocationSummary::kCallOnSlowPath
5892       : LocationSummary::kNoCall;
5893   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5894   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5895     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5896   }
5897 
5898   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5899     locations->SetInAt(0, Location::RequiresRegister());
5900   }
5901   locations->SetOut(Location::RequiresRegister());
5902   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5903     if (!kUseReadBarrier || kUseBakerReadBarrier) {
5904       // Rely on the type resolution and/or initialization to save everything.
5905       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5906     } else {
5907       // For non-Baker read barrier we have a temp-clobbering call.
5908     }
5909   }
5910 }
5911 
5912 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5913                                                  dex::TypeIndex type_index,
5914                                                  Handle<mirror::Class> handle) {
5915   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
5916   // Add a patch entry and return the label.
5917   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
5918   PatchInfo<Label>* info = &jit_class_patches_.back();
5919   return &info->label;
5920 }
5921 
5922 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5923 // move.
5924 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5925   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5926   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5927     codegen_->GenerateLoadClassRuntimeCall(cls);
5928     return;
5929   }
5930   DCHECK(!cls->NeedsAccessCheck());
5931 
5932   LocationSummary* locations = cls->GetLocations();
5933   Location out_loc = locations->Out();
5934   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5935 
5936   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5937       ? kWithoutReadBarrier
5938       : kCompilerReadBarrierOption;
5939   bool generate_null_check = false;
5940   switch (load_kind) {
5941     case HLoadClass::LoadKind::kReferrersClass: {
5942       DCHECK(!cls->CanCallRuntime());
5943       DCHECK(!cls->MustGenerateClinitCheck());
5944       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5945       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5946       GenerateGcRootFieldLoad(
5947           cls,
5948           out_loc,
5949           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5950           /* fixup_label= */ nullptr,
5951           read_barrier_option);
5952       break;
5953     }
5954     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5955       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5956       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5957       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
5958       codegen_->RecordBootImageTypePatch(cls);
5959       break;
5960     case HLoadClass::LoadKind::kBootImageRelRo: {
5961       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5962       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
5963       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
5964       break;
5965     }
5966     case HLoadClass::LoadKind::kBssEntry: {
5967       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5968                                           /* no_rip= */ false);
5969       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5970       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
5971       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5972       generate_null_check = true;
5973       break;
5974     }
5975     case HLoadClass::LoadKind::kJitBootImageAddress: {
5976       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5977       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5978       DCHECK_NE(address, 0u);
5979       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
5980       break;
5981     }
5982     case HLoadClass::LoadKind::kJitTableAddress: {
5983       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5984                                           /* no_rip= */ true);
5985       Label* fixup_label =
5986           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5987       // /* GcRoot<mirror::Class> */ out = *address
5988       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5989       break;
5990     }
5991     default:
5992       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5993       UNREACHABLE();
5994   }
5995 
5996   if (generate_null_check || cls->MustGenerateClinitCheck()) {
5997     DCHECK(cls->CanCallRuntime());
5998     SlowPathCode* slow_path =
5999         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6000     codegen_->AddSlowPath(slow_path);
6001     if (generate_null_check) {
6002       __ testl(out, out);
6003       __ j(kEqual, slow_path->GetEntryLabel());
6004     }
6005     if (cls->MustGenerateClinitCheck()) {
6006       GenerateClassInitializationCheck(slow_path, out);
6007     } else {
6008       __ Bind(slow_path->GetExitLabel());
6009     }
6010   }
6011 }
6012 
6013 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6014   LocationSummary* locations =
6015       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6016   locations->SetInAt(0, Location::RequiresRegister());
6017   if (check->HasUses()) {
6018     locations->SetOut(Location::SameAsFirstInput());
6019   }
6020   // Rely on the type initialization to save everything we need.
6021   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6022 }
6023 
6024 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6025   // Custom calling convention: RAX serves as both input and output.
6026   Location location = Location::RegisterLocation(RAX);
6027   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6028 }
6029 
6030 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6031   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6032 }
6033 
6034 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6035   // Custom calling convention: RAX serves as both input and output.
6036   Location location = Location::RegisterLocation(RAX);
6037   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6038 }
6039 
6040 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6041   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6042 }
6043 
6044 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6045   // We assume the class to not be null.
6046   SlowPathCode* slow_path =
6047       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6048   codegen_->AddSlowPath(slow_path);
6049   GenerateClassInitializationCheck(slow_path,
6050                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6051 }
6052 
6053 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6054     HLoadString::LoadKind desired_string_load_kind) {
6055   switch (desired_string_load_kind) {
6056     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6057     case HLoadString::LoadKind::kBootImageRelRo:
6058     case HLoadString::LoadKind::kBssEntry:
6059       DCHECK(!Runtime::Current()->UseJitCompilation());
6060       break;
6061     case HLoadString::LoadKind::kJitBootImageAddress:
6062     case HLoadString::LoadKind::kJitTableAddress:
6063       DCHECK(Runtime::Current()->UseJitCompilation());
6064       break;
6065     case HLoadString::LoadKind::kRuntimeCall:
6066       break;
6067   }
6068   return desired_string_load_kind;
6069 }
6070 
6071 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6072   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6073   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6074   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6075     locations->SetOut(Location::RegisterLocation(RAX));
6076   } else {
6077     locations->SetOut(Location::RequiresRegister());
6078     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6079       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6080         // Rely on the pResolveString to save everything.
6081         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6082       } else {
6083         // For non-Baker read barrier we have a temp-clobbering call.
6084       }
6085     }
6086   }
6087 }
6088 
6089 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6090                                                   dex::StringIndex string_index,
6091                                                   Handle<mirror::String> handle) {
6092   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6093   // Add a patch entry and return the label.
6094   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6095   PatchInfo<Label>* info = &jit_string_patches_.back();
6096   return &info->label;
6097 }
6098 
6099 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6100 // move.
6101 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6102   LocationSummary* locations = load->GetLocations();
6103   Location out_loc = locations->Out();
6104   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6105 
6106   switch (load->GetLoadKind()) {
6107     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6108       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6109       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6110       codegen_->RecordBootImageStringPatch(load);
6111       return;
6112     }
6113     case HLoadString::LoadKind::kBootImageRelRo: {
6114       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6115       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6116       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
6117       return;
6118     }
6119     case HLoadString::LoadKind::kBssEntry: {
6120       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6121                                           /* no_rip= */ false);
6122       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6123       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6124       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6125       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6126       codegen_->AddSlowPath(slow_path);
6127       __ testl(out, out);
6128       __ j(kEqual, slow_path->GetEntryLabel());
6129       __ Bind(slow_path->GetExitLabel());
6130       return;
6131     }
6132     case HLoadString::LoadKind::kJitBootImageAddress: {
6133       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6134       DCHECK_NE(address, 0u);
6135       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6136       return;
6137     }
6138     case HLoadString::LoadKind::kJitTableAddress: {
6139       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6140                                           /* no_rip= */ true);
6141       Label* fixup_label = codegen_->NewJitRootStringPatch(
6142           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6143       // /* GcRoot<mirror::String> */ out = *address
6144       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6145       return;
6146     }
6147     default:
6148       break;
6149   }
6150 
6151   // TODO: Re-add the compiler code to do string dex cache lookup again.
6152   // Custom calling convention: RAX serves as both input and output.
6153   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6154   codegen_->InvokeRuntime(kQuickResolveString,
6155                           load,
6156                           load->GetDexPc());
6157   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6158 }
6159 
6160 static Address GetExceptionTlsAddress() {
6161   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6162                            /* no_rip= */ true);
6163 }
6164 
6165 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6166   LocationSummary* locations =
6167       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6168   locations->SetOut(Location::RequiresRegister());
6169 }
6170 
6171 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6172   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6173 }
6174 
6175 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6176   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6177 }
6178 
6179 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6180   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6181 }
6182 
6183 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6184   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6185       instruction, LocationSummary::kCallOnMainOnly);
6186   InvokeRuntimeCallingConvention calling_convention;
6187   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6188 }
6189 
6190 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6191   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6192   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6193 }
6194 
6195 // Temp is used for read barrier.
6196 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6197   if (kEmitCompilerReadBarrier &&
6198       !kUseBakerReadBarrier &&
6199       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6200        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6201        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6202     return 1;
6203   }
6204   return 0;
6205 }
6206 
6207 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6208 // interface pointer, the current interface is compared in memory.
6209 // The other checks have one temp for loading the object's class.
6210 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6211   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6212     return 2;
6213   }
6214   return 1 + NumberOfInstanceOfTemps(type_check_kind);
6215 }
6216 
6217 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6218   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6219   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6220   bool baker_read_barrier_slow_path = false;
6221   switch (type_check_kind) {
6222     case TypeCheckKind::kExactCheck:
6223     case TypeCheckKind::kAbstractClassCheck:
6224     case TypeCheckKind::kClassHierarchyCheck:
6225     case TypeCheckKind::kArrayObjectCheck: {
6226       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6227       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6228       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6229       break;
6230     }
6231     case TypeCheckKind::kArrayCheck:
6232     case TypeCheckKind::kUnresolvedCheck:
6233     case TypeCheckKind::kInterfaceCheck:
6234       call_kind = LocationSummary::kCallOnSlowPath;
6235       break;
6236     case TypeCheckKind::kBitstringCheck:
6237       break;
6238   }
6239 
6240   LocationSummary* locations =
6241       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6242   if (baker_read_barrier_slow_path) {
6243     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6244   }
6245   locations->SetInAt(0, Location::RequiresRegister());
6246   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6247     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6248     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6249     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6250   } else {
6251     locations->SetInAt(1, Location::Any());
6252   }
6253   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6254   locations->SetOut(Location::RequiresRegister());
6255   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6256 }
6257 
6258 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6259   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6260   LocationSummary* locations = instruction->GetLocations();
6261   Location obj_loc = locations->InAt(0);
6262   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6263   Location cls = locations->InAt(1);
6264   Location out_loc =  locations->Out();
6265   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6266   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6267   DCHECK_LE(num_temps, 1u);
6268   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6269   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6270   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6271   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6272   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6273   SlowPathCode* slow_path = nullptr;
6274   NearLabel done, zero;
6275 
6276   // Return 0 if `obj` is null.
6277   // Avoid null check if we know obj is not null.
6278   if (instruction->MustDoNullCheck()) {
6279     __ testl(obj, obj);
6280     __ j(kEqual, &zero);
6281   }
6282 
6283   switch (type_check_kind) {
6284     case TypeCheckKind::kExactCheck: {
6285       ReadBarrierOption read_barrier_option =
6286           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6287       // /* HeapReference<Class> */ out = obj->klass_
6288       GenerateReferenceLoadTwoRegisters(instruction,
6289                                         out_loc,
6290                                         obj_loc,
6291                                         class_offset,
6292                                         read_barrier_option);
6293       if (cls.IsRegister()) {
6294         __ cmpl(out, cls.AsRegister<CpuRegister>());
6295       } else {
6296         DCHECK(cls.IsStackSlot()) << cls;
6297         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6298       }
6299       if (zero.IsLinked()) {
6300         // Classes must be equal for the instanceof to succeed.
6301         __ j(kNotEqual, &zero);
6302         __ movl(out, Immediate(1));
6303         __ jmp(&done);
6304       } else {
6305         __ setcc(kEqual, out);
6306         // setcc only sets the low byte.
6307         __ andl(out, Immediate(1));
6308       }
6309       break;
6310     }
6311 
6312     case TypeCheckKind::kAbstractClassCheck: {
6313       ReadBarrierOption read_barrier_option =
6314           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6315       // /* HeapReference<Class> */ out = obj->klass_
6316       GenerateReferenceLoadTwoRegisters(instruction,
6317                                         out_loc,
6318                                         obj_loc,
6319                                         class_offset,
6320                                         read_barrier_option);
6321       // If the class is abstract, we eagerly fetch the super class of the
6322       // object to avoid doing a comparison we know will fail.
6323       NearLabel loop, success;
6324       __ Bind(&loop);
6325       // /* HeapReference<Class> */ out = out->super_class_
6326       GenerateReferenceLoadOneRegister(instruction,
6327                                        out_loc,
6328                                        super_offset,
6329                                        maybe_temp_loc,
6330                                        read_barrier_option);
6331       __ testl(out, out);
6332       // If `out` is null, we use it for the result, and jump to `done`.
6333       __ j(kEqual, &done);
6334       if (cls.IsRegister()) {
6335         __ cmpl(out, cls.AsRegister<CpuRegister>());
6336       } else {
6337         DCHECK(cls.IsStackSlot()) << cls;
6338         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6339       }
6340       __ j(kNotEqual, &loop);
6341       __ movl(out, Immediate(1));
6342       if (zero.IsLinked()) {
6343         __ jmp(&done);
6344       }
6345       break;
6346     }
6347 
6348     case TypeCheckKind::kClassHierarchyCheck: {
6349       ReadBarrierOption read_barrier_option =
6350           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6351       // /* HeapReference<Class> */ out = obj->klass_
6352       GenerateReferenceLoadTwoRegisters(instruction,
6353                                         out_loc,
6354                                         obj_loc,
6355                                         class_offset,
6356                                         read_barrier_option);
6357       // Walk over the class hierarchy to find a match.
6358       NearLabel loop, success;
6359       __ Bind(&loop);
6360       if (cls.IsRegister()) {
6361         __ cmpl(out, cls.AsRegister<CpuRegister>());
6362       } else {
6363         DCHECK(cls.IsStackSlot()) << cls;
6364         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6365       }
6366       __ j(kEqual, &success);
6367       // /* HeapReference<Class> */ out = out->super_class_
6368       GenerateReferenceLoadOneRegister(instruction,
6369                                        out_loc,
6370                                        super_offset,
6371                                        maybe_temp_loc,
6372                                        read_barrier_option);
6373       __ testl(out, out);
6374       __ j(kNotEqual, &loop);
6375       // If `out` is null, we use it for the result, and jump to `done`.
6376       __ jmp(&done);
6377       __ Bind(&success);
6378       __ movl(out, Immediate(1));
6379       if (zero.IsLinked()) {
6380         __ jmp(&done);
6381       }
6382       break;
6383     }
6384 
6385     case TypeCheckKind::kArrayObjectCheck: {
6386       ReadBarrierOption read_barrier_option =
6387           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6388       // /* HeapReference<Class> */ out = obj->klass_
6389       GenerateReferenceLoadTwoRegisters(instruction,
6390                                         out_loc,
6391                                         obj_loc,
6392                                         class_offset,
6393                                         read_barrier_option);
6394       // Do an exact check.
6395       NearLabel exact_check;
6396       if (cls.IsRegister()) {
6397         __ cmpl(out, cls.AsRegister<CpuRegister>());
6398       } else {
6399         DCHECK(cls.IsStackSlot()) << cls;
6400         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6401       }
6402       __ j(kEqual, &exact_check);
6403       // Otherwise, we need to check that the object's class is a non-primitive array.
6404       // /* HeapReference<Class> */ out = out->component_type_
6405       GenerateReferenceLoadOneRegister(instruction,
6406                                        out_loc,
6407                                        component_offset,
6408                                        maybe_temp_loc,
6409                                        read_barrier_option);
6410       __ testl(out, out);
6411       // If `out` is null, we use it for the result, and jump to `done`.
6412       __ j(kEqual, &done);
6413       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6414       __ j(kNotEqual, &zero);
6415       __ Bind(&exact_check);
6416       __ movl(out, Immediate(1));
6417       __ jmp(&done);
6418       break;
6419     }
6420 
6421     case TypeCheckKind::kArrayCheck: {
6422       // No read barrier since the slow path will retry upon failure.
6423       // /* HeapReference<Class> */ out = obj->klass_
6424       GenerateReferenceLoadTwoRegisters(instruction,
6425                                         out_loc,
6426                                         obj_loc,
6427                                         class_offset,
6428                                         kWithoutReadBarrier);
6429       if (cls.IsRegister()) {
6430         __ cmpl(out, cls.AsRegister<CpuRegister>());
6431       } else {
6432         DCHECK(cls.IsStackSlot()) << cls;
6433         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6434       }
6435       DCHECK(locations->OnlyCallsOnSlowPath());
6436       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6437           instruction, /* is_fatal= */ false);
6438       codegen_->AddSlowPath(slow_path);
6439       __ j(kNotEqual, slow_path->GetEntryLabel());
6440       __ movl(out, Immediate(1));
6441       if (zero.IsLinked()) {
6442         __ jmp(&done);
6443       }
6444       break;
6445     }
6446 
6447     case TypeCheckKind::kUnresolvedCheck:
6448     case TypeCheckKind::kInterfaceCheck: {
6449       // Note that we indeed only call on slow path, but we always go
6450       // into the slow path for the unresolved and interface check
6451       // cases.
6452       //
6453       // We cannot directly call the InstanceofNonTrivial runtime
6454       // entry point without resorting to a type checking slow path
6455       // here (i.e. by calling InvokeRuntime directly), as it would
6456       // require to assign fixed registers for the inputs of this
6457       // HInstanceOf instruction (following the runtime calling
6458       // convention), which might be cluttered by the potential first
6459       // read barrier emission at the beginning of this method.
6460       //
6461       // TODO: Introduce a new runtime entry point taking the object
6462       // to test (instead of its class) as argument, and let it deal
6463       // with the read barrier issues. This will let us refactor this
6464       // case of the `switch` code as it was previously (with a direct
6465       // call to the runtime not using a type checking slow path).
6466       // This should also be beneficial for the other cases above.
6467       DCHECK(locations->OnlyCallsOnSlowPath());
6468       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6469           instruction, /* is_fatal= */ false);
6470       codegen_->AddSlowPath(slow_path);
6471       __ jmp(slow_path->GetEntryLabel());
6472       if (zero.IsLinked()) {
6473         __ jmp(&done);
6474       }
6475       break;
6476     }
6477 
6478     case TypeCheckKind::kBitstringCheck: {
6479       // /* HeapReference<Class> */ temp = obj->klass_
6480       GenerateReferenceLoadTwoRegisters(instruction,
6481                                         out_loc,
6482                                         obj_loc,
6483                                         class_offset,
6484                                         kWithoutReadBarrier);
6485 
6486       GenerateBitstringTypeCheckCompare(instruction, out);
6487       if (zero.IsLinked()) {
6488         __ j(kNotEqual, &zero);
6489         __ movl(out, Immediate(1));
6490         __ jmp(&done);
6491       } else {
6492         __ setcc(kEqual, out);
6493         // setcc only sets the low byte.
6494         __ andl(out, Immediate(1));
6495       }
6496       break;
6497     }
6498   }
6499 
6500   if (zero.IsLinked()) {
6501     __ Bind(&zero);
6502     __ xorl(out, out);
6503   }
6504 
6505   if (done.IsLinked()) {
6506     __ Bind(&done);
6507   }
6508 
6509   if (slow_path != nullptr) {
6510     __ Bind(slow_path->GetExitLabel());
6511   }
6512 }
6513 
6514 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6515   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6516   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6517   LocationSummary* locations =
6518       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6519   locations->SetInAt(0, Location::RequiresRegister());
6520   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6521     // Require a register for the interface check since there is a loop that compares the class to
6522     // a memory address.
6523     locations->SetInAt(1, Location::RequiresRegister());
6524   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6525     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6526     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6527     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6528   } else {
6529     locations->SetInAt(1, Location::Any());
6530   }
6531   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6532   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6533 }
6534 
6535 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6536   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6537   LocationSummary* locations = instruction->GetLocations();
6538   Location obj_loc = locations->InAt(0);
6539   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6540   Location cls = locations->InAt(1);
6541   Location temp_loc = locations->GetTemp(0);
6542   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6543   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6544   DCHECK_GE(num_temps, 1u);
6545   DCHECK_LE(num_temps, 2u);
6546   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6547   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6548   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6549   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6550   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6551   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6552   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6553   const uint32_t object_array_data_offset =
6554       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6555 
6556   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6557   SlowPathCode* type_check_slow_path =
6558       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6559           instruction, is_type_check_slow_path_fatal);
6560   codegen_->AddSlowPath(type_check_slow_path);
6561 
6562 
6563   NearLabel done;
6564   // Avoid null check if we know obj is not null.
6565   if (instruction->MustDoNullCheck()) {
6566     __ testl(obj, obj);
6567     __ j(kEqual, &done);
6568   }
6569 
6570   switch (type_check_kind) {
6571     case TypeCheckKind::kExactCheck:
6572     case TypeCheckKind::kArrayCheck: {
6573       // /* HeapReference<Class> */ temp = obj->klass_
6574       GenerateReferenceLoadTwoRegisters(instruction,
6575                                         temp_loc,
6576                                         obj_loc,
6577                                         class_offset,
6578                                         kWithoutReadBarrier);
6579       if (cls.IsRegister()) {
6580         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6581       } else {
6582         DCHECK(cls.IsStackSlot()) << cls;
6583         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6584       }
6585       // Jump to slow path for throwing the exception or doing a
6586       // more involved array check.
6587       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6588       break;
6589     }
6590 
6591     case TypeCheckKind::kAbstractClassCheck: {
6592       // /* HeapReference<Class> */ temp = obj->klass_
6593       GenerateReferenceLoadTwoRegisters(instruction,
6594                                         temp_loc,
6595                                         obj_loc,
6596                                         class_offset,
6597                                         kWithoutReadBarrier);
6598       // If the class is abstract, we eagerly fetch the super class of the
6599       // object to avoid doing a comparison we know will fail.
6600       NearLabel loop;
6601       __ Bind(&loop);
6602       // /* HeapReference<Class> */ temp = temp->super_class_
6603       GenerateReferenceLoadOneRegister(instruction,
6604                                        temp_loc,
6605                                        super_offset,
6606                                        maybe_temp2_loc,
6607                                        kWithoutReadBarrier);
6608 
6609       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6610       // exception.
6611       __ testl(temp, temp);
6612       // Otherwise, compare the classes.
6613       __ j(kZero, type_check_slow_path->GetEntryLabel());
6614       if (cls.IsRegister()) {
6615         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6616       } else {
6617         DCHECK(cls.IsStackSlot()) << cls;
6618         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6619       }
6620       __ j(kNotEqual, &loop);
6621       break;
6622     }
6623 
6624     case TypeCheckKind::kClassHierarchyCheck: {
6625       // /* HeapReference<Class> */ temp = obj->klass_
6626       GenerateReferenceLoadTwoRegisters(instruction,
6627                                         temp_loc,
6628                                         obj_loc,
6629                                         class_offset,
6630                                         kWithoutReadBarrier);
6631       // Walk over the class hierarchy to find a match.
6632       NearLabel loop;
6633       __ Bind(&loop);
6634       if (cls.IsRegister()) {
6635         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6636       } else {
6637         DCHECK(cls.IsStackSlot()) << cls;
6638         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6639       }
6640       __ j(kEqual, &done);
6641 
6642       // /* HeapReference<Class> */ temp = temp->super_class_
6643       GenerateReferenceLoadOneRegister(instruction,
6644                                        temp_loc,
6645                                        super_offset,
6646                                        maybe_temp2_loc,
6647                                        kWithoutReadBarrier);
6648 
6649       // If the class reference currently in `temp` is not null, jump
6650       // back at the beginning of the loop.
6651       __ testl(temp, temp);
6652       __ j(kNotZero, &loop);
6653       // Otherwise, jump to the slow path to throw the exception.
6654       __ jmp(type_check_slow_path->GetEntryLabel());
6655       break;
6656     }
6657 
6658     case TypeCheckKind::kArrayObjectCheck: {
6659       // /* HeapReference<Class> */ temp = obj->klass_
6660       GenerateReferenceLoadTwoRegisters(instruction,
6661                                         temp_loc,
6662                                         obj_loc,
6663                                         class_offset,
6664                                         kWithoutReadBarrier);
6665       // Do an exact check.
6666       NearLabel check_non_primitive_component_type;
6667       if (cls.IsRegister()) {
6668         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6669       } else {
6670         DCHECK(cls.IsStackSlot()) << cls;
6671         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6672       }
6673       __ j(kEqual, &done);
6674 
6675       // Otherwise, we need to check that the object's class is a non-primitive array.
6676       // /* HeapReference<Class> */ temp = temp->component_type_
6677       GenerateReferenceLoadOneRegister(instruction,
6678                                        temp_loc,
6679                                        component_offset,
6680                                        maybe_temp2_loc,
6681                                        kWithoutReadBarrier);
6682 
6683       // If the component type is not null (i.e. the object is indeed
6684       // an array), jump to label `check_non_primitive_component_type`
6685       // to further check that this component type is not a primitive
6686       // type.
6687       __ testl(temp, temp);
6688       // Otherwise, jump to the slow path to throw the exception.
6689       __ j(kZero, type_check_slow_path->GetEntryLabel());
6690       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6691       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6692       break;
6693     }
6694 
6695     case TypeCheckKind::kUnresolvedCheck: {
6696       // We always go into the type check slow path for the unresolved case.
6697       //
6698       // We cannot directly call the CheckCast runtime entry point
6699       // without resorting to a type checking slow path here (i.e. by
6700       // calling InvokeRuntime directly), as it would require to
6701       // assign fixed registers for the inputs of this HInstanceOf
6702       // instruction (following the runtime calling convention), which
6703       // might be cluttered by the potential first read barrier
6704       // emission at the beginning of this method.
6705       __ jmp(type_check_slow_path->GetEntryLabel());
6706       break;
6707     }
6708 
6709     case TypeCheckKind::kInterfaceCheck: {
6710       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6711       // We can not get false positives by doing this.
6712       // /* HeapReference<Class> */ temp = obj->klass_
6713       GenerateReferenceLoadTwoRegisters(instruction,
6714                                         temp_loc,
6715                                         obj_loc,
6716                                         class_offset,
6717                                         kWithoutReadBarrier);
6718 
6719       // /* HeapReference<Class> */ temp = temp->iftable_
6720       GenerateReferenceLoadTwoRegisters(instruction,
6721                                         temp_loc,
6722                                         temp_loc,
6723                                         iftable_offset,
6724                                         kWithoutReadBarrier);
6725       // Iftable is never null.
6726       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6727       // Maybe poison the `cls` for direct comparison with memory.
6728       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6729       // Loop through the iftable and check if any class matches.
6730       NearLabel start_loop;
6731       __ Bind(&start_loop);
6732       // Need to subtract first to handle the empty array case.
6733       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6734       __ j(kNegative, type_check_slow_path->GetEntryLabel());
6735       // Go to next interface if the classes do not match.
6736       __ cmpl(cls.AsRegister<CpuRegister>(),
6737               CodeGeneratorX86_64::ArrayAddress(temp,
6738                                                 maybe_temp2_loc,
6739                                                 TIMES_4,
6740                                                 object_array_data_offset));
6741       __ j(kNotEqual, &start_loop);  // Return if same class.
6742       // If `cls` was poisoned above, unpoison it.
6743       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6744       break;
6745     }
6746 
6747     case TypeCheckKind::kBitstringCheck: {
6748       // /* HeapReference<Class> */ temp = obj->klass_
6749       GenerateReferenceLoadTwoRegisters(instruction,
6750                                         temp_loc,
6751                                         obj_loc,
6752                                         class_offset,
6753                                         kWithoutReadBarrier);
6754 
6755       GenerateBitstringTypeCheckCompare(instruction, temp);
6756       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6757       break;
6758     }
6759   }
6760 
6761   if (done.IsLinked()) {
6762     __ Bind(&done);
6763   }
6764 
6765   __ Bind(type_check_slow_path->GetExitLabel());
6766 }
6767 
6768 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6769   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6770       instruction, LocationSummary::kCallOnMainOnly);
6771   InvokeRuntimeCallingConvention calling_convention;
6772   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6773 }
6774 
6775 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6776   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6777                           instruction,
6778                           instruction->GetDexPc());
6779   if (instruction->IsEnter()) {
6780     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6781   } else {
6782     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6783   }
6784 }
6785 
6786 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6787   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6788   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6789   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6790   locations->SetInAt(0, Location::RequiresRegister());
6791   // There is no immediate variant of negated bitwise and in X86.
6792   locations->SetInAt(1, Location::RequiresRegister());
6793   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6794 }
6795 
6796 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6797   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6798   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6799   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6800   locations->SetInAt(0, Location::RequiresRegister());
6801   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6802 }
6803 
6804 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6805   LocationSummary* locations = instruction->GetLocations();
6806   Location first = locations->InAt(0);
6807   Location second = locations->InAt(1);
6808   Location dest = locations->Out();
6809   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6810 }
6811 
6812 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6813   LocationSummary* locations = instruction->GetLocations();
6814   Location src = locations->InAt(0);
6815   Location dest = locations->Out();
6816   switch (instruction->GetOpKind()) {
6817     case HInstruction::kAnd:
6818       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6819       break;
6820     case HInstruction::kXor:
6821       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6822       break;
6823     default:
6824       LOG(FATAL) << "Unreachable";
6825   }
6826 }
6827 
6828 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
6829 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
6830 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6831 
6832 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6833   LocationSummary* locations =
6834       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6835   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6836          || instruction->GetResultType() == DataType::Type::kInt64);
6837   locations->SetInAt(0, Location::RequiresRegister());
6838   locations->SetInAt(1, Location::Any());
6839   locations->SetOut(Location::SameAsFirstInput());
6840 }
6841 
6842 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6843   HandleBitwiseOperation(instruction);
6844 }
6845 
6846 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6847   HandleBitwiseOperation(instruction);
6848 }
6849 
6850 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6851   HandleBitwiseOperation(instruction);
6852 }
6853 
6854 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6855   LocationSummary* locations = instruction->GetLocations();
6856   Location first = locations->InAt(0);
6857   Location second = locations->InAt(1);
6858   DCHECK(first.Equals(locations->Out()));
6859 
6860   if (instruction->GetResultType() == DataType::Type::kInt32) {
6861     if (second.IsRegister()) {
6862       if (instruction->IsAnd()) {
6863         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6864       } else if (instruction->IsOr()) {
6865         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6866       } else {
6867         DCHECK(instruction->IsXor());
6868         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6869       }
6870     } else if (second.IsConstant()) {
6871       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6872       if (instruction->IsAnd()) {
6873         __ andl(first.AsRegister<CpuRegister>(), imm);
6874       } else if (instruction->IsOr()) {
6875         __ orl(first.AsRegister<CpuRegister>(), imm);
6876       } else {
6877         DCHECK(instruction->IsXor());
6878         __ xorl(first.AsRegister<CpuRegister>(), imm);
6879       }
6880     } else {
6881       Address address(CpuRegister(RSP), second.GetStackIndex());
6882       if (instruction->IsAnd()) {
6883         __ andl(first.AsRegister<CpuRegister>(), address);
6884       } else if (instruction->IsOr()) {
6885         __ orl(first.AsRegister<CpuRegister>(), address);
6886       } else {
6887         DCHECK(instruction->IsXor());
6888         __ xorl(first.AsRegister<CpuRegister>(), address);
6889       }
6890     }
6891   } else {
6892     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
6893     CpuRegister first_reg = first.AsRegister<CpuRegister>();
6894     bool second_is_constant = false;
6895     int64_t value = 0;
6896     if (second.IsConstant()) {
6897       second_is_constant = true;
6898       value = second.GetConstant()->AsLongConstant()->GetValue();
6899     }
6900     bool is_int32_value = IsInt<32>(value);
6901 
6902     if (instruction->IsAnd()) {
6903       if (second_is_constant) {
6904         if (is_int32_value) {
6905           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6906         } else {
6907           __ andq(first_reg, codegen_->LiteralInt64Address(value));
6908         }
6909       } else if (second.IsDoubleStackSlot()) {
6910         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6911       } else {
6912         __ andq(first_reg, second.AsRegister<CpuRegister>());
6913       }
6914     } else if (instruction->IsOr()) {
6915       if (second_is_constant) {
6916         if (is_int32_value) {
6917           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6918         } else {
6919           __ orq(first_reg, codegen_->LiteralInt64Address(value));
6920         }
6921       } else if (second.IsDoubleStackSlot()) {
6922         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6923       } else {
6924         __ orq(first_reg, second.AsRegister<CpuRegister>());
6925       }
6926     } else {
6927       DCHECK(instruction->IsXor());
6928       if (second_is_constant) {
6929         if (is_int32_value) {
6930           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6931         } else {
6932           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6933         }
6934       } else if (second.IsDoubleStackSlot()) {
6935         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6936       } else {
6937         __ xorq(first_reg, second.AsRegister<CpuRegister>());
6938       }
6939     }
6940   }
6941 }
6942 
6943 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6944     HInstruction* instruction,
6945     Location out,
6946     uint32_t offset,
6947     Location maybe_temp,
6948     ReadBarrierOption read_barrier_option) {
6949   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6950   if (read_barrier_option == kWithReadBarrier) {
6951     CHECK(kEmitCompilerReadBarrier);
6952     if (kUseBakerReadBarrier) {
6953       // Load with fast path based Baker's read barrier.
6954       // /* HeapReference<Object> */ out = *(out + offset)
6955       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6956           instruction, out, out_reg, offset, /* needs_null_check= */ false);
6957     } else {
6958       // Load with slow path based read barrier.
6959       // Save the value of `out` into `maybe_temp` before overwriting it
6960       // in the following move operation, as we will need it for the
6961       // read barrier below.
6962       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6963       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6964       // /* HeapReference<Object> */ out = *(out + offset)
6965       __ movl(out_reg, Address(out_reg, offset));
6966       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6967     }
6968   } else {
6969     // Plain load with no read barrier.
6970     // /* HeapReference<Object> */ out = *(out + offset)
6971     __ movl(out_reg, Address(out_reg, offset));
6972     __ MaybeUnpoisonHeapReference(out_reg);
6973   }
6974 }
6975 
6976 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6977     HInstruction* instruction,
6978     Location out,
6979     Location obj,
6980     uint32_t offset,
6981     ReadBarrierOption read_barrier_option) {
6982   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6983   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6984   if (read_barrier_option == kWithReadBarrier) {
6985     CHECK(kEmitCompilerReadBarrier);
6986     if (kUseBakerReadBarrier) {
6987       // Load with fast path based Baker's read barrier.
6988       // /* HeapReference<Object> */ out = *(obj + offset)
6989       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6990           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
6991     } else {
6992       // Load with slow path based read barrier.
6993       // /* HeapReference<Object> */ out = *(obj + offset)
6994       __ movl(out_reg, Address(obj_reg, offset));
6995       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6996     }
6997   } else {
6998     // Plain load with no read barrier.
6999     // /* HeapReference<Object> */ out = *(obj + offset)
7000     __ movl(out_reg, Address(obj_reg, offset));
7001     __ MaybeUnpoisonHeapReference(out_reg);
7002   }
7003 }
7004 
7005 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7006     HInstruction* instruction,
7007     Location root,
7008     const Address& address,
7009     Label* fixup_label,
7010     ReadBarrierOption read_barrier_option) {
7011   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7012   if (read_barrier_option == kWithReadBarrier) {
7013     DCHECK(kEmitCompilerReadBarrier);
7014     if (kUseBakerReadBarrier) {
7015       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7016       // Baker's read barrier are used:
7017       //
7018       //   root = obj.field;
7019       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7020       //   if (temp != null) {
7021       //     root = temp(root)
7022       //   }
7023 
7024       // /* GcRoot<mirror::Object> */ root = *address
7025       __ movl(root_reg, address);
7026       if (fixup_label != nullptr) {
7027         __ Bind(fixup_label);
7028       }
7029       static_assert(
7030           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7031           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7032           "have different sizes.");
7033       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7034                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7035                     "have different sizes.");
7036 
7037       // Slow path marking the GC root `root`.
7038       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7039           instruction, root, /* unpoison_ref_before_marking= */ false);
7040       codegen_->AddSlowPath(slow_path);
7041 
7042       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7043       const int32_t entry_point_offset =
7044           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7045       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7046       // The entrypoint is null when the GC is not marking.
7047       __ j(kNotEqual, slow_path->GetEntryLabel());
7048       __ Bind(slow_path->GetExitLabel());
7049     } else {
7050       // GC root loaded through a slow path for read barriers other
7051       // than Baker's.
7052       // /* GcRoot<mirror::Object>* */ root = address
7053       __ leaq(root_reg, address);
7054       if (fixup_label != nullptr) {
7055         __ Bind(fixup_label);
7056       }
7057       // /* mirror::Object* */ root = root->Read()
7058       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7059     }
7060   } else {
7061     // Plain GC root load with no read barrier.
7062     // /* GcRoot<mirror::Object> */ root = *address
7063     __ movl(root_reg, address);
7064     if (fixup_label != nullptr) {
7065       __ Bind(fixup_label);
7066     }
7067     // Note that GC roots are not affected by heap poisoning, thus we
7068     // do not have to unpoison `root_reg` here.
7069   }
7070 }
7071 
7072 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7073                                                                 Location ref,
7074                                                                 CpuRegister obj,
7075                                                                 uint32_t offset,
7076                                                                 bool needs_null_check) {
7077   DCHECK(kEmitCompilerReadBarrier);
7078   DCHECK(kUseBakerReadBarrier);
7079 
7080   // /* HeapReference<Object> */ ref = *(obj + offset)
7081   Address src(obj, offset);
7082   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7083 }
7084 
7085 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7086                                                                 Location ref,
7087                                                                 CpuRegister obj,
7088                                                                 uint32_t data_offset,
7089                                                                 Location index,
7090                                                                 bool needs_null_check) {
7091   DCHECK(kEmitCompilerReadBarrier);
7092   DCHECK(kUseBakerReadBarrier);
7093 
7094   static_assert(
7095       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7096       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7097   // /* HeapReference<Object> */ ref =
7098   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7099   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7100   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7101 }
7102 
7103 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7104                                                                     Location ref,
7105                                                                     CpuRegister obj,
7106                                                                     const Address& src,
7107                                                                     bool needs_null_check,
7108                                                                     bool always_update_field,
7109                                                                     CpuRegister* temp1,
7110                                                                     CpuRegister* temp2) {
7111   DCHECK(kEmitCompilerReadBarrier);
7112   DCHECK(kUseBakerReadBarrier);
7113 
7114   // In slow path based read barriers, the read barrier call is
7115   // inserted after the original load. However, in fast path based
7116   // Baker's read barriers, we need to perform the load of
7117   // mirror::Object::monitor_ *before* the original reference load.
7118   // This load-load ordering is required by the read barrier.
7119   // The fast path/slow path (for Baker's algorithm) should look like:
7120   //
7121   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7122   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7123   //   HeapReference<Object> ref = *src;  // Original reference load.
7124   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7125   //   if (is_gray) {
7126   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7127   //   }
7128   //
7129   // Note: the original implementation in ReadBarrier::Barrier is
7130   // slightly more complex as:
7131   // - it implements the load-load fence using a data dependency on
7132   //   the high-bits of rb_state, which are expected to be all zeroes
7133   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7134   //   here, which is a no-op thanks to the x86-64 memory model);
7135   // - it performs additional checks that we do not do here for
7136   //   performance reasons.
7137 
7138   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7139   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7140 
7141   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7142   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7143   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7144   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7145   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7146   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7147 
7148   // if (rb_state == ReadBarrier::GrayState())
7149   //   ref = ReadBarrier::Mark(ref);
7150   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7151   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7152   if (needs_null_check) {
7153     MaybeRecordImplicitNullCheck(instruction);
7154   }
7155 
7156   // Load fence to prevent load-load reordering.
7157   // Note that this is a no-op, thanks to the x86-64 memory model.
7158   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7159 
7160   // The actual reference load.
7161   // /* HeapReference<Object> */ ref = *src
7162   __ movl(ref_reg, src);  // Flags are unaffected.
7163 
7164   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7165   // Slow path marking the object `ref` when it is gray.
7166   SlowPathCode* slow_path;
7167   if (always_update_field) {
7168     DCHECK(temp1 != nullptr);
7169     DCHECK(temp2 != nullptr);
7170     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7171         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7172   } else {
7173     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7174         instruction, ref, /* unpoison_ref_before_marking= */ true);
7175   }
7176   AddSlowPath(slow_path);
7177 
7178   // We have done the "if" of the gray bit check above, now branch based on the flags.
7179   __ j(kNotZero, slow_path->GetEntryLabel());
7180 
7181   // Object* ref = ref_addr->AsMirrorPtr()
7182   __ MaybeUnpoisonHeapReference(ref_reg);
7183 
7184   __ Bind(slow_path->GetExitLabel());
7185 }
7186 
7187 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7188                                                   Location out,
7189                                                   Location ref,
7190                                                   Location obj,
7191                                                   uint32_t offset,
7192                                                   Location index) {
7193   DCHECK(kEmitCompilerReadBarrier);
7194 
7195   // Insert a slow path based read barrier *after* the reference load.
7196   //
7197   // If heap poisoning is enabled, the unpoisoning of the loaded
7198   // reference will be carried out by the runtime within the slow
7199   // path.
7200   //
7201   // Note that `ref` currently does not get unpoisoned (when heap
7202   // poisoning is enabled), which is alright as the `ref` argument is
7203   // not used by the artReadBarrierSlow entry point.
7204   //
7205   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7206   SlowPathCode* slow_path = new (GetScopedAllocator())
7207       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7208   AddSlowPath(slow_path);
7209 
7210   __ jmp(slow_path->GetEntryLabel());
7211   __ Bind(slow_path->GetExitLabel());
7212 }
7213 
7214 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7215                                                        Location out,
7216                                                        Location ref,
7217                                                        Location obj,
7218                                                        uint32_t offset,
7219                                                        Location index) {
7220   if (kEmitCompilerReadBarrier) {
7221     // Baker's read barriers shall be handled by the fast path
7222     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7223     DCHECK(!kUseBakerReadBarrier);
7224     // If heap poisoning is enabled, unpoisoning will be taken care of
7225     // by the runtime within the slow path.
7226     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7227   } else if (kPoisonHeapReferences) {
7228     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7229   }
7230 }
7231 
7232 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7233                                                          Location out,
7234                                                          Location root) {
7235   DCHECK(kEmitCompilerReadBarrier);
7236 
7237   // Insert a slow path based read barrier *after* the GC root load.
7238   //
7239   // Note that GC roots are not affected by heap poisoning, so we do
7240   // not need to do anything special for this here.
7241   SlowPathCode* slow_path =
7242       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7243   AddSlowPath(slow_path);
7244 
7245   __ jmp(slow_path->GetEntryLabel());
7246   __ Bind(slow_path->GetExitLabel());
7247 }
7248 
7249 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7250   // Nothing to do, this should be removed during prepare for register allocator.
7251   LOG(FATAL) << "Unreachable";
7252 }
7253 
7254 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7255   // Nothing to do, this should be removed during prepare for register allocator.
7256   LOG(FATAL) << "Unreachable";
7257 }
7258 
7259 // Simple implementation of packed switch - generate cascaded compare/jumps.
7260 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7261   LocationSummary* locations =
7262       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7263   locations->SetInAt(0, Location::RequiresRegister());
7264   locations->AddTemp(Location::RequiresRegister());
7265   locations->AddTemp(Location::RequiresRegister());
7266 }
7267 
7268 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7269   int32_t lower_bound = switch_instr->GetStartValue();
7270   uint32_t num_entries = switch_instr->GetNumEntries();
7271   LocationSummary* locations = switch_instr->GetLocations();
7272   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7273   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7274   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7275   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7276 
7277   // Should we generate smaller inline compare/jumps?
7278   if (num_entries <= kPackedSwitchJumpTableThreshold) {
7279     // Figure out the correct compare values and jump conditions.
7280     // Handle the first compare/branch as a special case because it might
7281     // jump to the default case.
7282     DCHECK_GT(num_entries, 2u);
7283     Condition first_condition;
7284     uint32_t index;
7285     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7286     if (lower_bound != 0) {
7287       first_condition = kLess;
7288       __ cmpl(value_reg_in, Immediate(lower_bound));
7289       __ j(first_condition, codegen_->GetLabelOf(default_block));
7290       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7291 
7292       index = 1;
7293     } else {
7294       // Handle all the compare/jumps below.
7295       first_condition = kBelow;
7296       index = 0;
7297     }
7298 
7299     // Handle the rest of the compare/jumps.
7300     for (; index + 1 < num_entries; index += 2) {
7301       int32_t compare_to_value = lower_bound + index + 1;
7302       __ cmpl(value_reg_in, Immediate(compare_to_value));
7303       // Jump to successors[index] if value < case_value[index].
7304       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7305       // Jump to successors[index + 1] if value == case_value[index + 1].
7306       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7307     }
7308 
7309     if (index != num_entries) {
7310       // There are an odd number of entries. Handle the last one.
7311       DCHECK_EQ(index + 1, num_entries);
7312       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7313       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7314     }
7315 
7316     // And the default for any other value.
7317     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7318       __ jmp(codegen_->GetLabelOf(default_block));
7319     }
7320     return;
7321   }
7322 
7323   // Remove the bias, if needed.
7324   Register value_reg_out = value_reg_in.AsRegister();
7325   if (lower_bound != 0) {
7326     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7327     value_reg_out = temp_reg.AsRegister();
7328   }
7329   CpuRegister value_reg(value_reg_out);
7330 
7331   // Is the value in range?
7332   __ cmpl(value_reg, Immediate(num_entries - 1));
7333   __ j(kAbove, codegen_->GetLabelOf(default_block));
7334 
7335   // We are in the range of the table.
7336   // Load the address of the jump table in the constant area.
7337   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7338 
7339   // Load the (signed) offset from the jump table.
7340   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7341 
7342   // Add the offset to the address of the table base.
7343   __ addq(temp_reg, base_reg);
7344 
7345   // And jump.
7346   __ jmp(temp_reg);
7347 }
7348 
7349 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7350                                                       ATTRIBUTE_UNUSED) {
7351   LOG(FATAL) << "Unreachable";
7352 }
7353 
7354 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7355                                                               ATTRIBUTE_UNUSED) {
7356   LOG(FATAL) << "Unreachable";
7357 }
7358 
7359 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7360   if (value == 0) {
7361     __ xorl(dest, dest);
7362   } else {
7363     __ movl(dest, Immediate(value));
7364   }
7365 }
7366 
7367 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7368   if (value == 0) {
7369     // Clears upper bits too.
7370     __ xorl(dest, dest);
7371   } else if (IsUint<32>(value)) {
7372     // We can use a 32 bit move, as it will zero-extend and is shorter.
7373     __ movl(dest, Immediate(static_cast<int32_t>(value)));
7374   } else {
7375     __ movq(dest, Immediate(value));
7376   }
7377 }
7378 
7379 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7380   if (value == 0) {
7381     __ xorps(dest, dest);
7382   } else {
7383     __ movss(dest, LiteralInt32Address(value));
7384   }
7385 }
7386 
7387 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7388   if (value == 0) {
7389     __ xorpd(dest, dest);
7390   } else {
7391     __ movsd(dest, LiteralInt64Address(value));
7392   }
7393 }
7394 
7395 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7396   Load32BitValue(dest, bit_cast<int32_t, float>(value));
7397 }
7398 
7399 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7400   Load64BitValue(dest, bit_cast<int64_t, double>(value));
7401 }
7402 
7403 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7404   if (value == 0) {
7405     __ testl(dest, dest);
7406   } else {
7407     __ cmpl(dest, Immediate(value));
7408   }
7409 }
7410 
7411 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7412   if (IsInt<32>(value)) {
7413     if (value == 0) {
7414       __ testq(dest, dest);
7415     } else {
7416       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7417     }
7418   } else {
7419     // Value won't fit in an int.
7420     __ cmpq(dest, LiteralInt64Address(value));
7421   }
7422 }
7423 
7424 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7425   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7426   GenerateIntCompare(lhs_reg, rhs);
7427 }
7428 
7429 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7430   if (rhs.IsConstant()) {
7431     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7432     Compare32BitValue(lhs, value);
7433   } else if (rhs.IsStackSlot()) {
7434     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7435   } else {
7436     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7437   }
7438 }
7439 
7440 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7441   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7442   if (rhs.IsConstant()) {
7443     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7444     Compare64BitValue(lhs_reg, value);
7445   } else if (rhs.IsDoubleStackSlot()) {
7446     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7447   } else {
7448     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7449   }
7450 }
7451 
7452 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7453                                           Location index,
7454                                           ScaleFactor scale,
7455                                           uint32_t data_offset) {
7456   return index.IsConstant() ?
7457       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7458       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7459 }
7460 
7461 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7462   DCHECK(dest.IsDoubleStackSlot());
7463   if (IsInt<32>(value)) {
7464     // Can move directly as an int32 constant.
7465     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7466             Immediate(static_cast<int32_t>(value)));
7467   } else {
7468     Load64BitValue(CpuRegister(TMP), value);
7469     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7470   }
7471 }
7472 
7473 /**
7474  * Class to handle late fixup of offsets into constant area.
7475  */
7476 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7477  public:
7478   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7479       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7480 
7481  protected:
7482   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7483 
7484   CodeGeneratorX86_64* codegen_;
7485 
7486  private:
7487   void Process(const MemoryRegion& region, int pos) override {
7488     // Patch the correct offset for the instruction.  We use the address of the
7489     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7490     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7491     int32_t relative_position = constant_offset - pos;
7492 
7493     // Patch in the right value.
7494     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7495   }
7496 
7497   // Location in constant area that the fixup refers to.
7498   size_t offset_into_constant_area_;
7499 };
7500 
7501 /**
7502  t * Class to handle late fixup of offsets to a jump table that will be created in the
7503  * constant area.
7504  */
7505 class JumpTableRIPFixup : public RIPFixup {
7506  public:
7507   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7508       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7509 
7510   void CreateJumpTable() {
7511     X86_64Assembler* assembler = codegen_->GetAssembler();
7512 
7513     // Ensure that the reference to the jump table has the correct offset.
7514     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7515     SetOffset(offset_in_constant_table);
7516 
7517     // Compute the offset from the start of the function to this jump table.
7518     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7519 
7520     // Populate the jump table with the correct values for the jump table.
7521     int32_t num_entries = switch_instr_->GetNumEntries();
7522     HBasicBlock* block = switch_instr_->GetBlock();
7523     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7524     // The value that we want is the target offset - the position of the table.
7525     for (int32_t i = 0; i < num_entries; i++) {
7526       HBasicBlock* b = successors[i];
7527       Label* l = codegen_->GetLabelOf(b);
7528       DCHECK(l->IsBound());
7529       int32_t offset_to_block = l->Position() - current_table_offset;
7530       assembler->AppendInt32(offset_to_block);
7531     }
7532   }
7533 
7534  private:
7535   const HPackedSwitch* switch_instr_;
7536 };
7537 
7538 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7539   // Generate the constant area if needed.
7540   X86_64Assembler* assembler = GetAssembler();
7541   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7542     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7543     assembler->Align(4, 0);
7544     constant_area_start_ = assembler->CodeSize();
7545 
7546     // Populate any jump tables.
7547     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7548       jump_table->CreateJumpTable();
7549     }
7550 
7551     // And now add the constant area to the generated code.
7552     assembler->AddConstantArea();
7553   }
7554 
7555   // And finish up.
7556   CodeGenerator::Finalize(allocator);
7557 }
7558 
7559 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7560   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7561   return Address::RIP(fixup);
7562 }
7563 
7564 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7565   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7566   return Address::RIP(fixup);
7567 }
7568 
7569 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7570   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7571   return Address::RIP(fixup);
7572 }
7573 
7574 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7575   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7576   return Address::RIP(fixup);
7577 }
7578 
7579 // TODO: trg as memory.
7580 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7581   if (!trg.IsValid()) {
7582     DCHECK_EQ(type, DataType::Type::kVoid);
7583     return;
7584   }
7585 
7586   DCHECK_NE(type, DataType::Type::kVoid);
7587 
7588   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7589   if (trg.Equals(return_loc)) {
7590     return;
7591   }
7592 
7593   // Let the parallel move resolver take care of all of this.
7594   HParallelMove parallel_move(GetGraph()->GetAllocator());
7595   parallel_move.AddMove(return_loc, trg, type, nullptr);
7596   GetMoveResolver()->EmitNativeCode(&parallel_move);
7597 }
7598 
7599 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7600   // Create a fixup to be used to create and address the jump table.
7601   JumpTableRIPFixup* table_fixup =
7602       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7603 
7604   // We have to populate the jump tables.
7605   fixups_to_jump_tables_.push_back(table_fixup);
7606   return Address::RIP(table_fixup);
7607 }
7608 
7609 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7610                                              const Address& addr_high,
7611                                              int64_t v,
7612                                              HInstruction* instruction) {
7613   if (IsInt<32>(v)) {
7614     int32_t v_32 = v;
7615     __ movq(addr_low, Immediate(v_32));
7616     MaybeRecordImplicitNullCheck(instruction);
7617   } else {
7618     // Didn't fit in a register.  Do it in pieces.
7619     int32_t low_v = Low32Bits(v);
7620     int32_t high_v = High32Bits(v);
7621     __ movl(addr_low, Immediate(low_v));
7622     MaybeRecordImplicitNullCheck(instruction);
7623     __ movl(addr_high, Immediate(high_v));
7624   }
7625 }
7626 
7627 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7628                                           const uint8_t* roots_data,
7629                                           const PatchInfo<Label>& info,
7630                                           uint64_t index_in_table) const {
7631   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7632   uintptr_t address =
7633       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7634   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
7635   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7636      dchecked_integral_cast<uint32_t>(address);
7637 }
7638 
7639 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7640   for (const PatchInfo<Label>& info : jit_string_patches_) {
7641     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7642     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7643     PatchJitRootUse(code, roots_data, info, index_in_table);
7644   }
7645 
7646   for (const PatchInfo<Label>& info : jit_class_patches_) {
7647     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7648     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7649     PatchJitRootUse(code, roots_data, info, index_in_table);
7650   }
7651 }
7652 
7653 #undef __
7654 
7655 }  // namespace x86_64
7656 }  // namespace art
7657