1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "heap_poisoning.h"
26 #include "intrinsics.h"
27 #include "intrinsics_x86_64.h"
28 #include "linker/linker_patch.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/class-inl.h"
32 #include "mirror/object_reference.h"
33 #include "thread.h"
34 #include "utils/assembler.h"
35 #include "utils/stack_checks.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/managed_register_x86_64.h"
38 
39 namespace art {
40 
41 template<class MirrorType>
42 class GcRoot;
43 
44 namespace x86_64 {
45 
46 static constexpr int kCurrentMethodStackOffset = 0;
47 static constexpr Register kMethodRegisterArgument = RDI;
48 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
49 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
50 // generates less code/data with a small num_entries.
51 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
52 
53 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
54 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
55 
56 static constexpr int kC2ConditionMask = 0x400;
57 
58 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
59 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
60 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
61 
62 class NullCheckSlowPathX86_64 : public SlowPathCode {
63  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)64   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
65 
EmitNativeCode(CodeGenerator * codegen)66   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
67     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
68     __ Bind(GetEntryLabel());
69     if (instruction_->CanThrowIntoCatchBlock()) {
70       // Live registers will be restored in the catch block if caught.
71       SaveLiveRegisters(codegen, instruction_->GetLocations());
72     }
73     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
74                                   instruction_,
75                                   instruction_->GetDexPc(),
76                                   this);
77     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
78   }
79 
IsFatal() const80   bool IsFatal() const OVERRIDE { return true; }
81 
GetDescription() const82   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
83 
84  private:
85   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
86 };
87 
88 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
89  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)90   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
91 
EmitNativeCode(CodeGenerator * codegen)92   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
93     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
94     __ Bind(GetEntryLabel());
95     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
96     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
97   }
98 
IsFatal() const99   bool IsFatal() const OVERRIDE { return true; }
100 
GetDescription() const101   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
102 
103  private:
104   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
105 };
106 
107 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
108  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)109   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
110       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
111 
EmitNativeCode(CodeGenerator * codegen)112   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
113     __ Bind(GetEntryLabel());
114     if (type_ == DataType::Type::kInt32) {
115       if (is_div_) {
116         __ negl(cpu_reg_);
117       } else {
118         __ xorl(cpu_reg_, cpu_reg_);
119       }
120 
121     } else {
122       DCHECK_EQ(DataType::Type::kInt64, type_);
123       if (is_div_) {
124         __ negq(cpu_reg_);
125       } else {
126         __ xorl(cpu_reg_, cpu_reg_);
127       }
128     }
129     __ jmp(GetExitLabel());
130   }
131 
GetDescription() const132   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
133 
134  private:
135   const CpuRegister cpu_reg_;
136   const DataType::Type type_;
137   const bool is_div_;
138   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
139 };
140 
141 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
142  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)143   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
144       : SlowPathCode(instruction), successor_(successor) {}
145 
EmitNativeCode(CodeGenerator * codegen)146   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
147     LocationSummary* locations = instruction_->GetLocations();
148     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
149     __ Bind(GetEntryLabel());
150     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
151     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
152     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
153     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
154     if (successor_ == nullptr) {
155       __ jmp(GetReturnLabel());
156     } else {
157       __ jmp(x86_64_codegen->GetLabelOf(successor_));
158     }
159   }
160 
GetReturnLabel()161   Label* GetReturnLabel() {
162     DCHECK(successor_ == nullptr);
163     return &return_label_;
164   }
165 
GetSuccessor() const166   HBasicBlock* GetSuccessor() const {
167     return successor_;
168   }
169 
GetDescription() const170   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
171 
172  private:
173   HBasicBlock* const successor_;
174   Label return_label_;
175 
176   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
177 };
178 
179 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
180  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)181   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
182     : SlowPathCode(instruction) {}
183 
EmitNativeCode(CodeGenerator * codegen)184   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
185     LocationSummary* locations = instruction_->GetLocations();
186     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
187     __ Bind(GetEntryLabel());
188     if (instruction_->CanThrowIntoCatchBlock()) {
189       // Live registers will be restored in the catch block if caught.
190       SaveLiveRegisters(codegen, instruction_->GetLocations());
191     }
192     // Are we using an array length from memory?
193     HInstruction* array_length = instruction_->InputAt(1);
194     Location length_loc = locations->InAt(1);
195     InvokeRuntimeCallingConvention calling_convention;
196     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
197       // Load the array length into our temporary.
198       HArrayLength* length = array_length->AsArrayLength();
199       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
200       Location array_loc = array_length->GetLocations()->InAt(0);
201       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
202       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
203       // Check for conflicts with index.
204       if (length_loc.Equals(locations->InAt(0))) {
205         // We know we aren't using parameter 2.
206         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
207       }
208       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
209       if (mirror::kUseStringCompression && length->IsStringLength()) {
210         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
211       }
212     }
213 
214     // We're moving two locations to locations that could overlap, so we need a parallel
215     // move resolver.
216     codegen->EmitParallelMoves(
217         locations->InAt(0),
218         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
219         DataType::Type::kInt32,
220         length_loc,
221         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
222         DataType::Type::kInt32);
223     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
224         ? kQuickThrowStringBounds
225         : kQuickThrowArrayBounds;
226     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
227     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
228     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
229   }
230 
IsFatal() const231   bool IsFatal() const OVERRIDE { return true; }
232 
GetDescription() const233   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
234 
235  private:
236   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
237 };
238 
239 class LoadClassSlowPathX86_64 : public SlowPathCode {
240  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)241   LoadClassSlowPathX86_64(HLoadClass* cls,
242                           HInstruction* at,
243                           uint32_t dex_pc,
244                           bool do_clinit)
245       : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
246     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
247   }
248 
EmitNativeCode(CodeGenerator * codegen)249   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
250     LocationSummary* locations = instruction_->GetLocations();
251     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
252     __ Bind(GetEntryLabel());
253 
254     SaveLiveRegisters(codegen, locations);
255 
256     // Custom calling convention: RAX serves as both input and output.
257     __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
258     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
259                                   instruction_,
260                                   dex_pc_,
261                                   this);
262     if (do_clinit_) {
263       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
264     } else {
265       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
266     }
267 
268     Location out = locations->Out();
269     // Move the class to the desired location.
270     if (out.IsValid()) {
271       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
272       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
273     }
274 
275     RestoreLiveRegisters(codegen, locations);
276     __ jmp(GetExitLabel());
277   }
278 
GetDescription() const279   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
280 
281  private:
282   // The class this slow path will load.
283   HLoadClass* const cls_;
284 
285   // The dex PC of `at_`.
286   const uint32_t dex_pc_;
287 
288   // Whether to initialize the class.
289   const bool do_clinit_;
290 
291   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
292 };
293 
294 class LoadStringSlowPathX86_64 : public SlowPathCode {
295  public:
LoadStringSlowPathX86_64(HLoadString * instruction)296   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
297 
EmitNativeCode(CodeGenerator * codegen)298   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
299     LocationSummary* locations = instruction_->GetLocations();
300     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
301 
302     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
303     __ Bind(GetEntryLabel());
304     SaveLiveRegisters(codegen, locations);
305 
306     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
307     // Custom calling convention: RAX serves as both input and output.
308     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
309     x86_64_codegen->InvokeRuntime(kQuickResolveString,
310                                   instruction_,
311                                   instruction_->GetDexPc(),
312                                   this);
313     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
314     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
315     RestoreLiveRegisters(codegen, locations);
316 
317     __ jmp(GetExitLabel());
318   }
319 
GetDescription() const320   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
321 
322  private:
323   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
324 };
325 
326 class TypeCheckSlowPathX86_64 : public SlowPathCode {
327  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)328   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
329       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
330 
EmitNativeCode(CodeGenerator * codegen)331   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
332     LocationSummary* locations = instruction_->GetLocations();
333     uint32_t dex_pc = instruction_->GetDexPc();
334     DCHECK(instruction_->IsCheckCast()
335            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
336 
337     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
338     __ Bind(GetEntryLabel());
339 
340     if (kPoisonHeapReferences &&
341         instruction_->IsCheckCast() &&
342         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
343       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
344       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
345     }
346 
347     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
348       SaveLiveRegisters(codegen, locations);
349     }
350 
351     // We're moving two locations to locations that could overlap, so we need a parallel
352     // move resolver.
353     InvokeRuntimeCallingConvention calling_convention;
354     codegen->EmitParallelMoves(locations->InAt(0),
355                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
356                                DataType::Type::kReference,
357                                locations->InAt(1),
358                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
359                                DataType::Type::kReference);
360     if (instruction_->IsInstanceOf()) {
361       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
362       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
363     } else {
364       DCHECK(instruction_->IsCheckCast());
365       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
366       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
367     }
368 
369     if (!is_fatal_) {
370       if (instruction_->IsInstanceOf()) {
371         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
372       }
373 
374       RestoreLiveRegisters(codegen, locations);
375       __ jmp(GetExitLabel());
376     }
377   }
378 
GetDescription() const379   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
380 
IsFatal() const381   bool IsFatal() const OVERRIDE { return is_fatal_; }
382 
383  private:
384   const bool is_fatal_;
385 
386   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
387 };
388 
389 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
390  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)391   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
392       : SlowPathCode(instruction) {}
393 
EmitNativeCode(CodeGenerator * codegen)394   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
395     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
396     __ Bind(GetEntryLabel());
397     LocationSummary* locations = instruction_->GetLocations();
398     SaveLiveRegisters(codegen, locations);
399     InvokeRuntimeCallingConvention calling_convention;
400     x86_64_codegen->Load32BitValue(
401         CpuRegister(calling_convention.GetRegisterAt(0)),
402         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
403     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
404     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
405   }
406 
GetDescription() const407   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
408 
409  private:
410   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
411 };
412 
413 class ArraySetSlowPathX86_64 : public SlowPathCode {
414  public:
ArraySetSlowPathX86_64(HInstruction * instruction)415   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
416 
EmitNativeCode(CodeGenerator * codegen)417   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
418     LocationSummary* locations = instruction_->GetLocations();
419     __ Bind(GetEntryLabel());
420     SaveLiveRegisters(codegen, locations);
421 
422     InvokeRuntimeCallingConvention calling_convention;
423     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
424     parallel_move.AddMove(
425         locations->InAt(0),
426         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
427         DataType::Type::kReference,
428         nullptr);
429     parallel_move.AddMove(
430         locations->InAt(1),
431         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
432         DataType::Type::kInt32,
433         nullptr);
434     parallel_move.AddMove(
435         locations->InAt(2),
436         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
437         DataType::Type::kReference,
438         nullptr);
439     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
440 
441     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
442     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
443     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
444     RestoreLiveRegisters(codegen, locations);
445     __ jmp(GetExitLabel());
446   }
447 
GetDescription() const448   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
449 
450  private:
451   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
452 };
453 
454 // Slow path marking an object reference `ref` during a read
455 // barrier. The field `obj.field` in the object `obj` holding this
456 // reference does not get updated by this slow path after marking (see
457 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
458 //
459 // This means that after the execution of this slow path, `ref` will
460 // always be up-to-date, but `obj.field` may not; i.e., after the
461 // flip, `ref` will be a to-space reference, but `obj.field` will
462 // probably still be a from-space reference (unless it gets updated by
463 // another thread, or if another thread installed another object
464 // reference (different from `ref`) in `obj.field`).
465 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
466  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)467   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
468                                 Location ref,
469                                 bool unpoison_ref_before_marking)
470       : SlowPathCode(instruction),
471         ref_(ref),
472         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
473     DCHECK(kEmitCompilerReadBarrier);
474   }
475 
GetDescription() const476   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
477 
EmitNativeCode(CodeGenerator * codegen)478   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
479     LocationSummary* locations = instruction_->GetLocations();
480     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
481     Register ref_reg = ref_cpu_reg.AsRegister();
482     DCHECK(locations->CanCall());
483     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
484     DCHECK(instruction_->IsInstanceFieldGet() ||
485            instruction_->IsStaticFieldGet() ||
486            instruction_->IsArrayGet() ||
487            instruction_->IsArraySet() ||
488            instruction_->IsLoadClass() ||
489            instruction_->IsLoadString() ||
490            instruction_->IsInstanceOf() ||
491            instruction_->IsCheckCast() ||
492            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
493            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
494         << "Unexpected instruction in read barrier marking slow path: "
495         << instruction_->DebugName();
496 
497     __ Bind(GetEntryLabel());
498     if (unpoison_ref_before_marking_) {
499       // Object* ref = ref_addr->AsMirrorPtr()
500       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
501     }
502     // No need to save live registers; it's taken care of by the
503     // entrypoint. Also, there is no need to update the stack mask,
504     // as this runtime call will not trigger a garbage collection.
505     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
506     DCHECK_NE(ref_reg, RSP);
507     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
508     // "Compact" slow path, saving two moves.
509     //
510     // Instead of using the standard runtime calling convention (input
511     // and output in R0):
512     //
513     //   RDI <- ref
514     //   RAX <- ReadBarrierMark(RDI)
515     //   ref <- RAX
516     //
517     // we just use rX (the register containing `ref`) as input and output
518     // of a dedicated entrypoint:
519     //
520     //   rX <- ReadBarrierMarkRegX(rX)
521     //
522     int32_t entry_point_offset =
523         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
524     // This runtime call does not require a stack map.
525     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
526     __ jmp(GetExitLabel());
527   }
528 
529  private:
530   // The location (register) of the marked object reference.
531   const Location ref_;
532   // Should the reference in `ref_` be unpoisoned prior to marking it?
533   const bool unpoison_ref_before_marking_;
534 
535   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
536 };
537 
538 // Slow path marking an object reference `ref` during a read barrier,
539 // and if needed, atomically updating the field `obj.field` in the
540 // object `obj` holding this reference after marking (contrary to
541 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
542 // `obj.field`).
543 //
544 // This means that after the execution of this slow path, both `ref`
545 // and `obj.field` will be up-to-date; i.e., after the flip, both will
546 // hold the same to-space reference (unless another thread installed
547 // another object reference (different from `ref`) in `obj.field`).
548 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
549  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)550   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
551                                               Location ref,
552                                               CpuRegister obj,
553                                               const Address& field_addr,
554                                               bool unpoison_ref_before_marking,
555                                               CpuRegister temp1,
556                                               CpuRegister temp2)
557       : SlowPathCode(instruction),
558         ref_(ref),
559         obj_(obj),
560         field_addr_(field_addr),
561         unpoison_ref_before_marking_(unpoison_ref_before_marking),
562         temp1_(temp1),
563         temp2_(temp2) {
564     DCHECK(kEmitCompilerReadBarrier);
565   }
566 
GetDescription() const567   const char* GetDescription() const OVERRIDE {
568     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
569   }
570 
EmitNativeCode(CodeGenerator * codegen)571   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
572     LocationSummary* locations = instruction_->GetLocations();
573     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
574     Register ref_reg = ref_cpu_reg.AsRegister();
575     DCHECK(locations->CanCall());
576     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
577     // This slow path is only used by the UnsafeCASObject intrinsic.
578     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
579         << "Unexpected instruction in read barrier marking and field updating slow path: "
580         << instruction_->DebugName();
581     DCHECK(instruction_->GetLocations()->Intrinsified());
582     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
583 
584     __ Bind(GetEntryLabel());
585     if (unpoison_ref_before_marking_) {
586       // Object* ref = ref_addr->AsMirrorPtr()
587       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
588     }
589 
590     // Save the old (unpoisoned) reference.
591     __ movl(temp1_, ref_cpu_reg);
592 
593     // No need to save live registers; it's taken care of by the
594     // entrypoint. Also, there is no need to update the stack mask,
595     // as this runtime call will not trigger a garbage collection.
596     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
597     DCHECK_NE(ref_reg, RSP);
598     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
599     // "Compact" slow path, saving two moves.
600     //
601     // Instead of using the standard runtime calling convention (input
602     // and output in R0):
603     //
604     //   RDI <- ref
605     //   RAX <- ReadBarrierMark(RDI)
606     //   ref <- RAX
607     //
608     // we just use rX (the register containing `ref`) as input and output
609     // of a dedicated entrypoint:
610     //
611     //   rX <- ReadBarrierMarkRegX(rX)
612     //
613     int32_t entry_point_offset =
614         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
615     // This runtime call does not require a stack map.
616     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
617 
618     // If the new reference is different from the old reference,
619     // update the field in the holder (`*field_addr`).
620     //
621     // Note that this field could also hold a different object, if
622     // another thread had concurrently changed it. In that case, the
623     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
624     // operation below would abort the CAS, leaving the field as-is.
625     NearLabel done;
626     __ cmpl(temp1_, ref_cpu_reg);
627     __ j(kEqual, &done);
628 
629     // Update the the holder's field atomically.  This may fail if
630     // mutator updates before us, but it's OK.  This is achived
631     // using a strong compare-and-set (CAS) operation with relaxed
632     // memory synchronization ordering, where the expected value is
633     // the old reference and the desired value is the new reference.
634     // This operation is implemented with a 32-bit LOCK CMPXLCHG
635     // instruction, which requires the expected value (the old
636     // reference) to be in EAX.  Save RAX beforehand, and move the
637     // expected value (stored in `temp1_`) into EAX.
638     __ movq(temp2_, CpuRegister(RAX));
639     __ movl(CpuRegister(RAX), temp1_);
640 
641     // Convenience aliases.
642     CpuRegister base = obj_;
643     CpuRegister expected = CpuRegister(RAX);
644     CpuRegister value = ref_cpu_reg;
645 
646     bool base_equals_value = (base.AsRegister() == value.AsRegister());
647     Register value_reg = ref_reg;
648     if (kPoisonHeapReferences) {
649       if (base_equals_value) {
650         // If `base` and `value` are the same register location, move
651         // `value_reg` to a temporary register.  This way, poisoning
652         // `value_reg` won't invalidate `base`.
653         value_reg = temp1_.AsRegister();
654         __ movl(CpuRegister(value_reg), base);
655       }
656 
657       // Check that the register allocator did not assign the location
658       // of `expected` (RAX) to `value` nor to `base`, so that heap
659       // poisoning (when enabled) works as intended below.
660       // - If `value` were equal to `expected`, both references would
661       //   be poisoned twice, meaning they would not be poisoned at
662       //   all, as heap poisoning uses address negation.
663       // - If `base` were equal to `expected`, poisoning `expected`
664       //   would invalidate `base`.
665       DCHECK_NE(value_reg, expected.AsRegister());
666       DCHECK_NE(base.AsRegister(), expected.AsRegister());
667 
668       __ PoisonHeapReference(expected);
669       __ PoisonHeapReference(CpuRegister(value_reg));
670     }
671 
672     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
673 
674     // If heap poisoning is enabled, we need to unpoison the values
675     // that were poisoned earlier.
676     if (kPoisonHeapReferences) {
677       if (base_equals_value) {
678         // `value_reg` has been moved to a temporary register, no need
679         // to unpoison it.
680       } else {
681         __ UnpoisonHeapReference(CpuRegister(value_reg));
682       }
683       // No need to unpoison `expected` (RAX), as it is be overwritten below.
684     }
685 
686     // Restore RAX.
687     __ movq(CpuRegister(RAX), temp2_);
688 
689     __ Bind(&done);
690     __ jmp(GetExitLabel());
691   }
692 
693  private:
694   // The location (register) of the marked object reference.
695   const Location ref_;
696   // The register containing the object holding the marked object reference field.
697   const CpuRegister obj_;
698   // The address of the marked reference field.  The base of this address must be `obj_`.
699   const Address field_addr_;
700 
701   // Should the reference in `ref_` be unpoisoned prior to marking it?
702   const bool unpoison_ref_before_marking_;
703 
704   const CpuRegister temp1_;
705   const CpuRegister temp2_;
706 
707   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
708 };
709 
710 // Slow path generating a read barrier for a heap reference.
711 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
712  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)713   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
714                                             Location out,
715                                             Location ref,
716                                             Location obj,
717                                             uint32_t offset,
718                                             Location index)
719       : SlowPathCode(instruction),
720         out_(out),
721         ref_(ref),
722         obj_(obj),
723         offset_(offset),
724         index_(index) {
725     DCHECK(kEmitCompilerReadBarrier);
726     // If `obj` is equal to `out` or `ref`, it means the initial
727     // object has been overwritten by (or after) the heap object
728     // reference load to be instrumented, e.g.:
729     //
730     //   __ movl(out, Address(out, offset));
731     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
732     //
733     // In that case, we have lost the information about the original
734     // object, and the emitted read barrier cannot work properly.
735     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
736     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
737 }
738 
EmitNativeCode(CodeGenerator * codegen)739   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
740     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
741     LocationSummary* locations = instruction_->GetLocations();
742     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
743     DCHECK(locations->CanCall());
744     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
745     DCHECK(instruction_->IsInstanceFieldGet() ||
746            instruction_->IsStaticFieldGet() ||
747            instruction_->IsArrayGet() ||
748            instruction_->IsInstanceOf() ||
749            instruction_->IsCheckCast() ||
750            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
751         << "Unexpected instruction in read barrier for heap reference slow path: "
752         << instruction_->DebugName();
753 
754     __ Bind(GetEntryLabel());
755     SaveLiveRegisters(codegen, locations);
756 
757     // We may have to change the index's value, but as `index_` is a
758     // constant member (like other "inputs" of this slow path),
759     // introduce a copy of it, `index`.
760     Location index = index_;
761     if (index_.IsValid()) {
762       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
763       if (instruction_->IsArrayGet()) {
764         // Compute real offset and store it in index_.
765         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
766         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
767         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
768           // We are about to change the value of `index_reg` (see the
769           // calls to art::x86_64::X86_64Assembler::shll and
770           // art::x86_64::X86_64Assembler::AddImmediate below), but it
771           // has not been saved by the previous call to
772           // art::SlowPathCode::SaveLiveRegisters, as it is a
773           // callee-save register --
774           // art::SlowPathCode::SaveLiveRegisters does not consider
775           // callee-save registers, as it has been designed with the
776           // assumption that callee-save registers are supposed to be
777           // handled by the called function.  So, as a callee-save
778           // register, `index_reg` _would_ eventually be saved onto
779           // the stack, but it would be too late: we would have
780           // changed its value earlier.  Therefore, we manually save
781           // it here into another freely available register,
782           // `free_reg`, chosen of course among the caller-save
783           // registers (as a callee-save `free_reg` register would
784           // exhibit the same problem).
785           //
786           // Note we could have requested a temporary register from
787           // the register allocator instead; but we prefer not to, as
788           // this is a slow path, and we know we can find a
789           // caller-save register that is available.
790           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
791           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
792           index_reg = free_reg;
793           index = Location::RegisterLocation(index_reg);
794         } else {
795           // The initial register stored in `index_` has already been
796           // saved in the call to art::SlowPathCode::SaveLiveRegisters
797           // (as it is not a callee-save register), so we can freely
798           // use it.
799         }
800         // Shifting the index value contained in `index_reg` by the
801         // scale factor (2) cannot overflow in practice, as the
802         // runtime is unable to allocate object arrays with a size
803         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
804         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
805         static_assert(
806             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
807             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
808         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
809       } else {
810         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
811         // intrinsics, `index_` is not shifted by a scale factor of 2
812         // (as in the case of ArrayGet), as it is actually an offset
813         // to an object field within an object.
814         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
815         DCHECK(instruction_->GetLocations()->Intrinsified());
816         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
817                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
818             << instruction_->AsInvoke()->GetIntrinsic();
819         DCHECK_EQ(offset_, 0U);
820         DCHECK(index_.IsRegister());
821       }
822     }
823 
824     // We're moving two or three locations to locations that could
825     // overlap, so we need a parallel move resolver.
826     InvokeRuntimeCallingConvention calling_convention;
827     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
828     parallel_move.AddMove(ref_,
829                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
830                           DataType::Type::kReference,
831                           nullptr);
832     parallel_move.AddMove(obj_,
833                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
834                           DataType::Type::kReference,
835                           nullptr);
836     if (index.IsValid()) {
837       parallel_move.AddMove(index,
838                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
839                             DataType::Type::kInt32,
840                             nullptr);
841       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
842     } else {
843       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
844       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
845     }
846     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
847                                   instruction_,
848                                   instruction_->GetDexPc(),
849                                   this);
850     CheckEntrypointTypes<
851         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
852     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
853 
854     RestoreLiveRegisters(codegen, locations);
855     __ jmp(GetExitLabel());
856   }
857 
GetDescription() const858   const char* GetDescription() const OVERRIDE {
859     return "ReadBarrierForHeapReferenceSlowPathX86_64";
860   }
861 
862  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)863   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
864     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
865     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
866     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
867       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
868         return static_cast<CpuRegister>(i);
869       }
870     }
871     // We shall never fail to find a free caller-save register, as
872     // there are more than two core caller-save registers on x86-64
873     // (meaning it is possible to find one which is different from
874     // `ref` and `obj`).
875     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
876     LOG(FATAL) << "Could not find a free caller-save register";
877     UNREACHABLE();
878   }
879 
880   const Location out_;
881   const Location ref_;
882   const Location obj_;
883   const uint32_t offset_;
884   // An additional location containing an index to an array.
885   // Only used for HArrayGet and the UnsafeGetObject &
886   // UnsafeGetObjectVolatile intrinsics.
887   const Location index_;
888 
889   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
890 };
891 
892 // Slow path generating a read barrier for a GC root.
893 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
894  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)895   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
896       : SlowPathCode(instruction), out_(out), root_(root) {
897     DCHECK(kEmitCompilerReadBarrier);
898   }
899 
EmitNativeCode(CodeGenerator * codegen)900   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
901     LocationSummary* locations = instruction_->GetLocations();
902     DCHECK(locations->CanCall());
903     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
904     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
905         << "Unexpected instruction in read barrier for GC root slow path: "
906         << instruction_->DebugName();
907 
908     __ Bind(GetEntryLabel());
909     SaveLiveRegisters(codegen, locations);
910 
911     InvokeRuntimeCallingConvention calling_convention;
912     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
913     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
914     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
915                                   instruction_,
916                                   instruction_->GetDexPc(),
917                                   this);
918     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
919     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
920 
921     RestoreLiveRegisters(codegen, locations);
922     __ jmp(GetExitLabel());
923   }
924 
GetDescription() const925   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
926 
927  private:
928   const Location out_;
929   const Location root_;
930 
931   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
932 };
933 
934 #undef __
935 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
936 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
937 
X86_64IntegerCondition(IfCondition cond)938 inline Condition X86_64IntegerCondition(IfCondition cond) {
939   switch (cond) {
940     case kCondEQ: return kEqual;
941     case kCondNE: return kNotEqual;
942     case kCondLT: return kLess;
943     case kCondLE: return kLessEqual;
944     case kCondGT: return kGreater;
945     case kCondGE: return kGreaterEqual;
946     case kCondB:  return kBelow;
947     case kCondBE: return kBelowEqual;
948     case kCondA:  return kAbove;
949     case kCondAE: return kAboveEqual;
950   }
951   LOG(FATAL) << "Unreachable";
952   UNREACHABLE();
953 }
954 
955 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)956 inline Condition X86_64FPCondition(IfCondition cond) {
957   switch (cond) {
958     case kCondEQ: return kEqual;
959     case kCondNE: return kNotEqual;
960     case kCondLT: return kBelow;
961     case kCondLE: return kBelowEqual;
962     case kCondGT: return kAbove;
963     case kCondGE: return kAboveEqual;
964     default:      break;  // should not happen
965   }
966   LOG(FATAL) << "Unreachable";
967   UNREACHABLE();
968 }
969 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)970 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
971       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
972       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
973   return desired_dispatch_info;
974 }
975 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)976 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
977     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
978   // All registers are assumed to be correctly set up.
979 
980   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
981   switch (invoke->GetMethodLoadKind()) {
982     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
983       // temp = thread->string_init_entrypoint
984       uint32_t offset =
985           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
986       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
987       break;
988     }
989     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
990       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
991       break;
992     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
993       DCHECK(GetCompilerOptions().IsBootImage());
994       __ leal(temp.AsRegister<CpuRegister>(),
995               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
996       RecordBootImageMethodPatch(invoke);
997       break;
998     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
999       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1000       break;
1001     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1002       __ movq(temp.AsRegister<CpuRegister>(),
1003               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
1004       RecordMethodBssEntryPatch(invoke);
1005       break;
1006     }
1007     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1008       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1009       return;  // No code pointer retrieval; the runtime performs the call directly.
1010     }
1011   }
1012 
1013   switch (invoke->GetCodePtrLocation()) {
1014     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1015       __ call(&frame_entry_label_);
1016       break;
1017     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1018       // (callee_method + offset_of_quick_compiled_code)()
1019       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1020                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1021                           kX86_64PointerSize).SizeValue()));
1022       break;
1023   }
1024   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1025 
1026   DCHECK(!IsLeafMethod());
1027 }
1028 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1029 void CodeGeneratorX86_64::GenerateVirtualCall(
1030     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1031   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1032   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1033       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1034 
1035   // Use the calling convention instead of the location of the receiver, as
1036   // intrinsics may have put the receiver in a different register. In the intrinsics
1037   // slow path, the arguments have been moved to the right place, so here we are
1038   // guaranteed that the receiver is the first register of the calling convention.
1039   InvokeDexCallingConvention calling_convention;
1040   Register receiver = calling_convention.GetRegisterAt(0);
1041 
1042   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1043   // /* HeapReference<Class> */ temp = receiver->klass_
1044   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1045   MaybeRecordImplicitNullCheck(invoke);
1046   // Instead of simply (possibly) unpoisoning `temp` here, we should
1047   // emit a read barrier for the previous class reference load.
1048   // However this is not required in practice, as this is an
1049   // intermediate/temporary reference and because the current
1050   // concurrent copying collector keeps the from-space memory
1051   // intact/accessible until the end of the marking phase (the
1052   // concurrent copying collector may not in the future).
1053   __ MaybeUnpoisonHeapReference(temp);
1054   // temp = temp->GetMethodAt(method_offset);
1055   __ movq(temp, Address(temp, method_offset));
1056   // call temp->GetEntryPoint();
1057   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1058       kX86_64PointerSize).SizeValue()));
1059   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1060 }
1061 
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1062 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1063   boot_image_method_patches_.emplace_back(
1064       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1065   __ Bind(&boot_image_method_patches_.back().label);
1066 }
1067 
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1068 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1069   method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1070   __ Bind(&method_bss_entry_patches_.back().label);
1071 }
1072 
RecordBootImageTypePatch(HLoadClass * load_class)1073 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1074   boot_image_type_patches_.emplace_back(
1075       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1076   __ Bind(&boot_image_type_patches_.back().label);
1077 }
1078 
NewTypeBssEntryPatch(HLoadClass * load_class)1079 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1080   type_bss_entry_patches_.emplace_back(
1081       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1082   return &type_bss_entry_patches_.back().label;
1083 }
1084 
RecordBootImageStringPatch(HLoadString * load_string)1085 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1086   boot_image_string_patches_.emplace_back(
1087       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1088   __ Bind(&boot_image_string_patches_.back().label);
1089 }
1090 
NewStringBssEntryPatch(HLoadString * load_string)1091 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1092   DCHECK(!GetCompilerOptions().IsBootImage());
1093   string_bss_entry_patches_.emplace_back(
1094       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1095   return &string_bss_entry_patches_.back().label;
1096 }
1097 
1098 // The label points to the end of the "movl" or another instruction but the literal offset
1099 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1100 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1101 
1102 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1103 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1104     const ArenaDeque<PatchInfo<Label>>& infos,
1105     ArenaVector<linker::LinkerPatch>* linker_patches) {
1106   for (const PatchInfo<Label>& info : infos) {
1107     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1108     linker_patches->push_back(
1109         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1110   }
1111 }
1112 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1113 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1114   DCHECK(linker_patches->empty());
1115   size_t size =
1116       boot_image_method_patches_.size() +
1117       method_bss_entry_patches_.size() +
1118       boot_image_type_patches_.size() +
1119       type_bss_entry_patches_.size() +
1120       boot_image_string_patches_.size() +
1121       string_bss_entry_patches_.size();
1122   linker_patches->reserve(size);
1123   if (GetCompilerOptions().IsBootImage()) {
1124     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1125         boot_image_method_patches_, linker_patches);
1126     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1127         boot_image_type_patches_, linker_patches);
1128     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1129         boot_image_string_patches_, linker_patches);
1130   } else {
1131     DCHECK(boot_image_method_patches_.empty());
1132     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
1133         boot_image_type_patches_, linker_patches);
1134     EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
1135         boot_image_string_patches_, linker_patches);
1136   }
1137   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1138       method_bss_entry_patches_, linker_patches);
1139   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1140       type_bss_entry_patches_, linker_patches);
1141   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1142       string_bss_entry_patches_, linker_patches);
1143   DCHECK_EQ(size, linker_patches->size());
1144 }
1145 
DumpCoreRegister(std::ostream & stream,int reg) const1146 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1147   stream << Register(reg);
1148 }
1149 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1150 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1151   stream << FloatRegister(reg);
1152 }
1153 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1154 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1155   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1156   return kX86_64WordSize;
1157 }
1158 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1159 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1160   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1161   return kX86_64WordSize;
1162 }
1163 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1164 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1165   if (GetGraph()->HasSIMD()) {
1166     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1167   } else {
1168     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1169   }
1170   return GetFloatingPointSpillSlotSize();
1171 }
1172 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1173 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1174   if (GetGraph()->HasSIMD()) {
1175     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1176   } else {
1177     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1178   }
1179   return GetFloatingPointSpillSlotSize();
1180 }
1181 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1182 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1183                                         HInstruction* instruction,
1184                                         uint32_t dex_pc,
1185                                         SlowPathCode* slow_path) {
1186   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1187   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1188   if (EntrypointRequiresStackMap(entrypoint)) {
1189     RecordPcInfo(instruction, dex_pc, slow_path);
1190   }
1191 }
1192 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1193 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1194                                                               HInstruction* instruction,
1195                                                               SlowPathCode* slow_path) {
1196   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1197   GenerateInvokeRuntime(entry_point_offset);
1198 }
1199 
GenerateInvokeRuntime(int32_t entry_point_offset)1200 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1201   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
1202 }
1203 
1204 static constexpr int kNumberOfCpuRegisterPairs = 0;
1205 // Use a fake return address register to mimic Quick.
1206 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1207 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1208                                          const X86_64InstructionSetFeatures& isa_features,
1209                                          const CompilerOptions& compiler_options,
1210                                          OptimizingCompilerStats* stats)
1211       : CodeGenerator(graph,
1212                       kNumberOfCpuRegisters,
1213                       kNumberOfFloatRegisters,
1214                       kNumberOfCpuRegisterPairs,
1215                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1216                                           arraysize(kCoreCalleeSaves))
1217                           | (1 << kFakeReturnRegister),
1218                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1219                                           arraysize(kFpuCalleeSaves)),
1220                       compiler_options,
1221                       stats),
1222         block_labels_(nullptr),
1223         location_builder_(graph, this),
1224         instruction_visitor_(graph, this),
1225         move_resolver_(graph->GetAllocator(), this),
1226         assembler_(graph->GetAllocator()),
1227         isa_features_(isa_features),
1228         constant_area_start_(0),
1229         boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1230         method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1231         boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1232         type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1233         boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1234         string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1235         jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1236         jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1237         fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1238   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1239 }
1240 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1241 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1242                                                                CodeGeneratorX86_64* codegen)
1243       : InstructionCodeGenerator(graph, codegen),
1244         assembler_(codegen->GetAssembler()),
1245         codegen_(codegen) {}
1246 
SetupBlockedRegisters() const1247 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1248   // Stack register is always reserved.
1249   blocked_core_registers_[RSP] = true;
1250 
1251   // Block the register used as TMP.
1252   blocked_core_registers_[TMP] = true;
1253 }
1254 
DWARFReg(Register reg)1255 static dwarf::Reg DWARFReg(Register reg) {
1256   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1257 }
1258 
DWARFReg(FloatRegister reg)1259 static dwarf::Reg DWARFReg(FloatRegister reg) {
1260   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1261 }
1262 
GenerateFrameEntry()1263 void CodeGeneratorX86_64::GenerateFrameEntry() {
1264   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1265   __ Bind(&frame_entry_label_);
1266   bool skip_overflow_check = IsLeafMethod()
1267       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1268   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1269 
1270   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1271     __ addw(Address(CpuRegister(kMethodRegisterArgument),
1272                     ArtMethod::HotnessCountOffset().Int32Value()),
1273             Immediate(1));
1274   }
1275 
1276   if (!skip_overflow_check) {
1277     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1278     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1279     RecordPcInfo(nullptr, 0);
1280   }
1281 
1282   if (HasEmptyFrame()) {
1283     return;
1284   }
1285 
1286   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1287     Register reg = kCoreCalleeSaves[i];
1288     if (allocated_registers_.ContainsCoreRegister(reg)) {
1289       __ pushq(CpuRegister(reg));
1290       __ cfi().AdjustCFAOffset(kX86_64WordSize);
1291       __ cfi().RelOffset(DWARFReg(reg), 0);
1292     }
1293   }
1294 
1295   int adjust = GetFrameSize() - GetCoreSpillSize();
1296   __ subq(CpuRegister(RSP), Immediate(adjust));
1297   __ cfi().AdjustCFAOffset(adjust);
1298   uint32_t xmm_spill_location = GetFpuSpillStart();
1299   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1300 
1301   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1302     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1303       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1304       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1305       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1306     }
1307   }
1308 
1309   // Save the current method if we need it. Note that we do not
1310   // do this in HCurrentMethod, as the instruction might have been removed
1311   // in the SSA graph.
1312   if (RequiresCurrentMethod()) {
1313     __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1314             CpuRegister(kMethodRegisterArgument));
1315   }
1316 
1317   if (GetGraph()->HasShouldDeoptimizeFlag()) {
1318     // Initialize should_deoptimize flag to 0.
1319     __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1320   }
1321 }
1322 
GenerateFrameExit()1323 void CodeGeneratorX86_64::GenerateFrameExit() {
1324   __ cfi().RememberState();
1325   if (!HasEmptyFrame()) {
1326     uint32_t xmm_spill_location = GetFpuSpillStart();
1327     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1328     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1329       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1330         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1331         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1332         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1333       }
1334     }
1335 
1336     int adjust = GetFrameSize() - GetCoreSpillSize();
1337     __ addq(CpuRegister(RSP), Immediate(adjust));
1338     __ cfi().AdjustCFAOffset(-adjust);
1339 
1340     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1341       Register reg = kCoreCalleeSaves[i];
1342       if (allocated_registers_.ContainsCoreRegister(reg)) {
1343         __ popq(CpuRegister(reg));
1344         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1345         __ cfi().Restore(DWARFReg(reg));
1346       }
1347     }
1348   }
1349   __ ret();
1350   __ cfi().RestoreState();
1351   __ cfi().DefCFAOffset(GetFrameSize());
1352 }
1353 
Bind(HBasicBlock * block)1354 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1355   __ Bind(GetLabelOf(block));
1356 }
1357 
Move(Location destination,Location source)1358 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1359   if (source.Equals(destination)) {
1360     return;
1361   }
1362   if (destination.IsRegister()) {
1363     CpuRegister dest = destination.AsRegister<CpuRegister>();
1364     if (source.IsRegister()) {
1365       __ movq(dest, source.AsRegister<CpuRegister>());
1366     } else if (source.IsFpuRegister()) {
1367       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1368     } else if (source.IsStackSlot()) {
1369       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1370     } else if (source.IsConstant()) {
1371       HConstant* constant = source.GetConstant();
1372       if (constant->IsLongConstant()) {
1373         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1374       } else {
1375         Load32BitValue(dest, GetInt32ValueOf(constant));
1376       }
1377     } else {
1378       DCHECK(source.IsDoubleStackSlot());
1379       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1380     }
1381   } else if (destination.IsFpuRegister()) {
1382     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1383     if (source.IsRegister()) {
1384       __ movd(dest, source.AsRegister<CpuRegister>());
1385     } else if (source.IsFpuRegister()) {
1386       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1387     } else if (source.IsConstant()) {
1388       HConstant* constant = source.GetConstant();
1389       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1390       if (constant->IsFloatConstant()) {
1391         Load32BitValue(dest, static_cast<int32_t>(value));
1392       } else {
1393         Load64BitValue(dest, value);
1394       }
1395     } else if (source.IsStackSlot()) {
1396       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1397     } else {
1398       DCHECK(source.IsDoubleStackSlot());
1399       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1400     }
1401   } else if (destination.IsStackSlot()) {
1402     if (source.IsRegister()) {
1403       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1404               source.AsRegister<CpuRegister>());
1405     } else if (source.IsFpuRegister()) {
1406       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1407                source.AsFpuRegister<XmmRegister>());
1408     } else if (source.IsConstant()) {
1409       HConstant* constant = source.GetConstant();
1410       int32_t value = GetInt32ValueOf(constant);
1411       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1412     } else {
1413       DCHECK(source.IsStackSlot()) << source;
1414       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1415       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1416     }
1417   } else {
1418     DCHECK(destination.IsDoubleStackSlot());
1419     if (source.IsRegister()) {
1420       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1421               source.AsRegister<CpuRegister>());
1422     } else if (source.IsFpuRegister()) {
1423       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1424                source.AsFpuRegister<XmmRegister>());
1425     } else if (source.IsConstant()) {
1426       HConstant* constant = source.GetConstant();
1427       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1428       int64_t value = GetInt64ValueOf(constant);
1429       Store64BitValueToStack(destination, value);
1430     } else {
1431       DCHECK(source.IsDoubleStackSlot());
1432       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1433       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1434     }
1435   }
1436 }
1437 
MoveConstant(Location location,int32_t value)1438 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1439   DCHECK(location.IsRegister());
1440   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1441 }
1442 
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1443 void CodeGeneratorX86_64::MoveLocation(
1444     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1445   Move(dst, src);
1446 }
1447 
AddLocationAsTemp(Location location,LocationSummary * locations)1448 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1449   if (location.IsRegister()) {
1450     locations->AddTemp(location);
1451   } else {
1452     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1453   }
1454 }
1455 
HandleGoto(HInstruction * got,HBasicBlock * successor)1456 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1457   if (successor->IsExitBlock()) {
1458     DCHECK(got->GetPrevious()->AlwaysThrows());
1459     return;  // no code needed
1460   }
1461 
1462   HBasicBlock* block = got->GetBlock();
1463   HInstruction* previous = got->GetPrevious();
1464 
1465   HLoopInformation* info = block->GetLoopInformation();
1466   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1467     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1468       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
1469       __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
1470               Immediate(1));
1471     }
1472     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1473     return;
1474   }
1475 
1476   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1477     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1478   }
1479   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1480     __ jmp(codegen_->GetLabelOf(successor));
1481   }
1482 }
1483 
VisitGoto(HGoto * got)1484 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1485   got->SetLocations(nullptr);
1486 }
1487 
VisitGoto(HGoto * got)1488 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1489   HandleGoto(got, got->GetSuccessor());
1490 }
1491 
VisitTryBoundary(HTryBoundary * try_boundary)1492 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1493   try_boundary->SetLocations(nullptr);
1494 }
1495 
VisitTryBoundary(HTryBoundary * try_boundary)1496 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1497   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1498   if (!successor->IsExitBlock()) {
1499     HandleGoto(try_boundary, successor);
1500   }
1501 }
1502 
VisitExit(HExit * exit)1503 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1504   exit->SetLocations(nullptr);
1505 }
1506 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1507 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1508 }
1509 
1510 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1511 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1512                                                      LabelType* true_label,
1513                                                      LabelType* false_label) {
1514   if (cond->IsFPConditionTrueIfNaN()) {
1515     __ j(kUnordered, true_label);
1516   } else if (cond->IsFPConditionFalseIfNaN()) {
1517     __ j(kUnordered, false_label);
1518   }
1519   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1520 }
1521 
GenerateCompareTest(HCondition * condition)1522 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1523   LocationSummary* locations = condition->GetLocations();
1524 
1525   Location left = locations->InAt(0);
1526   Location right = locations->InAt(1);
1527   DataType::Type type = condition->InputAt(0)->GetType();
1528   switch (type) {
1529     case DataType::Type::kBool:
1530     case DataType::Type::kUint8:
1531     case DataType::Type::kInt8:
1532     case DataType::Type::kUint16:
1533     case DataType::Type::kInt16:
1534     case DataType::Type::kInt32:
1535     case DataType::Type::kReference: {
1536       codegen_->GenerateIntCompare(left, right);
1537       break;
1538     }
1539     case DataType::Type::kInt64: {
1540       codegen_->GenerateLongCompare(left, right);
1541       break;
1542     }
1543     case DataType::Type::kFloat32: {
1544       if (right.IsFpuRegister()) {
1545         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1546       } else if (right.IsConstant()) {
1547         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1548                    codegen_->LiteralFloatAddress(
1549                      right.GetConstant()->AsFloatConstant()->GetValue()));
1550       } else {
1551         DCHECK(right.IsStackSlot());
1552         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1553                    Address(CpuRegister(RSP), right.GetStackIndex()));
1554       }
1555       break;
1556     }
1557     case DataType::Type::kFloat64: {
1558       if (right.IsFpuRegister()) {
1559         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1560       } else if (right.IsConstant()) {
1561         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1562                    codegen_->LiteralDoubleAddress(
1563                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1564       } else {
1565         DCHECK(right.IsDoubleStackSlot());
1566         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1567                    Address(CpuRegister(RSP), right.GetStackIndex()));
1568       }
1569       break;
1570     }
1571     default:
1572       LOG(FATAL) << "Unexpected condition type " << type;
1573   }
1574 }
1575 
1576 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1577 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1578                                                                   LabelType* true_target_in,
1579                                                                   LabelType* false_target_in) {
1580   // Generated branching requires both targets to be explicit. If either of the
1581   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1582   LabelType fallthrough_target;
1583   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1584   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1585 
1586   // Generate the comparison to set the CC.
1587   GenerateCompareTest(condition);
1588 
1589   // Now generate the correct jump(s).
1590   DataType::Type type = condition->InputAt(0)->GetType();
1591   switch (type) {
1592     case DataType::Type::kInt64: {
1593       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1594       break;
1595     }
1596     case DataType::Type::kFloat32: {
1597       GenerateFPJumps(condition, true_target, false_target);
1598       break;
1599     }
1600     case DataType::Type::kFloat64: {
1601       GenerateFPJumps(condition, true_target, false_target);
1602       break;
1603     }
1604     default:
1605       LOG(FATAL) << "Unexpected condition type " << type;
1606   }
1607 
1608   if (false_target != &fallthrough_target) {
1609     __ jmp(false_target);
1610   }
1611 
1612   if (fallthrough_target.IsLinked()) {
1613     __ Bind(&fallthrough_target);
1614   }
1615 }
1616 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1617 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1618   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1619   // are set only strictly before `branch`. We can't use the eflags on long
1620   // conditions if they are materialized due to the complex branching.
1621   return cond->IsCondition() &&
1622          cond->GetNext() == branch &&
1623          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1624 }
1625 
1626 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1627 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1628                                                            size_t condition_input_index,
1629                                                            LabelType* true_target,
1630                                                            LabelType* false_target) {
1631   HInstruction* cond = instruction->InputAt(condition_input_index);
1632 
1633   if (true_target == nullptr && false_target == nullptr) {
1634     // Nothing to do. The code always falls through.
1635     return;
1636   } else if (cond->IsIntConstant()) {
1637     // Constant condition, statically compared against "true" (integer value 1).
1638     if (cond->AsIntConstant()->IsTrue()) {
1639       if (true_target != nullptr) {
1640         __ jmp(true_target);
1641       }
1642     } else {
1643       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1644       if (false_target != nullptr) {
1645         __ jmp(false_target);
1646       }
1647     }
1648     return;
1649   }
1650 
1651   // The following code generates these patterns:
1652   //  (1) true_target == nullptr && false_target != nullptr
1653   //        - opposite condition true => branch to false_target
1654   //  (2) true_target != nullptr && false_target == nullptr
1655   //        - condition true => branch to true_target
1656   //  (3) true_target != nullptr && false_target != nullptr
1657   //        - condition true => branch to true_target
1658   //        - branch to false_target
1659   if (IsBooleanValueOrMaterializedCondition(cond)) {
1660     if (AreEflagsSetFrom(cond, instruction)) {
1661       if (true_target == nullptr) {
1662         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1663       } else {
1664         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1665       }
1666     } else {
1667       // Materialized condition, compare against 0.
1668       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1669       if (lhs.IsRegister()) {
1670         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1671       } else {
1672         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1673       }
1674       if (true_target == nullptr) {
1675         __ j(kEqual, false_target);
1676       } else {
1677         __ j(kNotEqual, true_target);
1678       }
1679     }
1680   } else {
1681     // Condition has not been materialized, use its inputs as the
1682     // comparison and its condition as the branch condition.
1683     HCondition* condition = cond->AsCondition();
1684 
1685     // If this is a long or FP comparison that has been folded into
1686     // the HCondition, generate the comparison directly.
1687     DataType::Type type = condition->InputAt(0)->GetType();
1688     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1689       GenerateCompareTestAndBranch(condition, true_target, false_target);
1690       return;
1691     }
1692 
1693     Location lhs = condition->GetLocations()->InAt(0);
1694     Location rhs = condition->GetLocations()->InAt(1);
1695     codegen_->GenerateIntCompare(lhs, rhs);
1696       if (true_target == nullptr) {
1697       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1698     } else {
1699       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1700     }
1701   }
1702 
1703   // If neither branch falls through (case 3), the conditional branch to `true_target`
1704   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1705   if (true_target != nullptr && false_target != nullptr) {
1706     __ jmp(false_target);
1707   }
1708 }
1709 
VisitIf(HIf * if_instr)1710 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1711   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1712   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1713     locations->SetInAt(0, Location::Any());
1714   }
1715 }
1716 
VisitIf(HIf * if_instr)1717 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1718   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1719   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1720   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1721       nullptr : codegen_->GetLabelOf(true_successor);
1722   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1723       nullptr : codegen_->GetLabelOf(false_successor);
1724   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1725 }
1726 
VisitDeoptimize(HDeoptimize * deoptimize)1727 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1728   LocationSummary* locations = new (GetGraph()->GetAllocator())
1729       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1730   InvokeRuntimeCallingConvention calling_convention;
1731   RegisterSet caller_saves = RegisterSet::Empty();
1732   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1733   locations->SetCustomSlowPathCallerSaves(caller_saves);
1734   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1735     locations->SetInAt(0, Location::Any());
1736   }
1737 }
1738 
VisitDeoptimize(HDeoptimize * deoptimize)1739 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1740   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1741   GenerateTestAndBranch<Label>(deoptimize,
1742                                /* condition_input_index */ 0,
1743                                slow_path->GetEntryLabel(),
1744                                /* false_target */ nullptr);
1745 }
1746 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1747 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1748   LocationSummary* locations = new (GetGraph()->GetAllocator())
1749       LocationSummary(flag, LocationSummary::kNoCall);
1750   locations->SetOut(Location::RequiresRegister());
1751 }
1752 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1753 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1754   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1755           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1756 }
1757 
SelectCanUseCMOV(HSelect * select)1758 static bool SelectCanUseCMOV(HSelect* select) {
1759   // There are no conditional move instructions for XMMs.
1760   if (DataType::IsFloatingPointType(select->GetType())) {
1761     return false;
1762   }
1763 
1764   // A FP condition doesn't generate the single CC that we need.
1765   HInstruction* condition = select->GetCondition();
1766   if (condition->IsCondition() &&
1767       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1768     return false;
1769   }
1770 
1771   // We can generate a CMOV for this Select.
1772   return true;
1773 }
1774 
VisitSelect(HSelect * select)1775 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1776   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1777   if (DataType::IsFloatingPointType(select->GetType())) {
1778     locations->SetInAt(0, Location::RequiresFpuRegister());
1779     locations->SetInAt(1, Location::Any());
1780   } else {
1781     locations->SetInAt(0, Location::RequiresRegister());
1782     if (SelectCanUseCMOV(select)) {
1783       if (select->InputAt(1)->IsConstant()) {
1784         locations->SetInAt(1, Location::RequiresRegister());
1785       } else {
1786         locations->SetInAt(1, Location::Any());
1787       }
1788     } else {
1789       locations->SetInAt(1, Location::Any());
1790     }
1791   }
1792   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1793     locations->SetInAt(2, Location::RequiresRegister());
1794   }
1795   locations->SetOut(Location::SameAsFirstInput());
1796 }
1797 
VisitSelect(HSelect * select)1798 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1799   LocationSummary* locations = select->GetLocations();
1800   if (SelectCanUseCMOV(select)) {
1801     // If both the condition and the source types are integer, we can generate
1802     // a CMOV to implement Select.
1803     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1804     Location value_true_loc = locations->InAt(1);
1805     DCHECK(locations->InAt(0).Equals(locations->Out()));
1806 
1807     HInstruction* select_condition = select->GetCondition();
1808     Condition cond = kNotEqual;
1809 
1810     // Figure out how to test the 'condition'.
1811     if (select_condition->IsCondition()) {
1812       HCondition* condition = select_condition->AsCondition();
1813       if (!condition->IsEmittedAtUseSite()) {
1814         // This was a previously materialized condition.
1815         // Can we use the existing condition code?
1816         if (AreEflagsSetFrom(condition, select)) {
1817           // Materialization was the previous instruction.  Condition codes are right.
1818           cond = X86_64IntegerCondition(condition->GetCondition());
1819         } else {
1820           // No, we have to recreate the condition code.
1821           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1822           __ testl(cond_reg, cond_reg);
1823         }
1824       } else {
1825         GenerateCompareTest(condition);
1826         cond = X86_64IntegerCondition(condition->GetCondition());
1827       }
1828     } else {
1829       // Must be a Boolean condition, which needs to be compared to 0.
1830       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1831       __ testl(cond_reg, cond_reg);
1832     }
1833 
1834     // If the condition is true, overwrite the output, which already contains false.
1835     // Generate the correct sized CMOV.
1836     bool is_64_bit = DataType::Is64BitType(select->GetType());
1837     if (value_true_loc.IsRegister()) {
1838       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1839     } else {
1840       __ cmov(cond,
1841               value_false,
1842               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1843     }
1844   } else {
1845     NearLabel false_target;
1846     GenerateTestAndBranch<NearLabel>(select,
1847                                      /* condition_input_index */ 2,
1848                                      /* true_target */ nullptr,
1849                                      &false_target);
1850     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1851     __ Bind(&false_target);
1852   }
1853 }
1854 
VisitNativeDebugInfo(HNativeDebugInfo * info)1855 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1856   new (GetGraph()->GetAllocator()) LocationSummary(info);
1857 }
1858 
VisitNativeDebugInfo(HNativeDebugInfo *)1859 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1860   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1861 }
1862 
GenerateNop()1863 void CodeGeneratorX86_64::GenerateNop() {
1864   __ nop();
1865 }
1866 
HandleCondition(HCondition * cond)1867 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1868   LocationSummary* locations =
1869       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1870   // Handle the long/FP comparisons made in instruction simplification.
1871   switch (cond->InputAt(0)->GetType()) {
1872     case DataType::Type::kInt64:
1873       locations->SetInAt(0, Location::RequiresRegister());
1874       locations->SetInAt(1, Location::Any());
1875       break;
1876     case DataType::Type::kFloat32:
1877     case DataType::Type::kFloat64:
1878       locations->SetInAt(0, Location::RequiresFpuRegister());
1879       locations->SetInAt(1, Location::Any());
1880       break;
1881     default:
1882       locations->SetInAt(0, Location::RequiresRegister());
1883       locations->SetInAt(1, Location::Any());
1884       break;
1885   }
1886   if (!cond->IsEmittedAtUseSite()) {
1887     locations->SetOut(Location::RequiresRegister());
1888   }
1889 }
1890 
HandleCondition(HCondition * cond)1891 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1892   if (cond->IsEmittedAtUseSite()) {
1893     return;
1894   }
1895 
1896   LocationSummary* locations = cond->GetLocations();
1897   Location lhs = locations->InAt(0);
1898   Location rhs = locations->InAt(1);
1899   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1900   NearLabel true_label, false_label;
1901 
1902   switch (cond->InputAt(0)->GetType()) {
1903     default:
1904       // Integer case.
1905 
1906       // Clear output register: setcc only sets the low byte.
1907       __ xorl(reg, reg);
1908 
1909       codegen_->GenerateIntCompare(lhs, rhs);
1910       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1911       return;
1912     case DataType::Type::kInt64:
1913       // Clear output register: setcc only sets the low byte.
1914       __ xorl(reg, reg);
1915 
1916       codegen_->GenerateLongCompare(lhs, rhs);
1917       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1918       return;
1919     case DataType::Type::kFloat32: {
1920       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1921       if (rhs.IsConstant()) {
1922         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1923         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1924       } else if (rhs.IsStackSlot()) {
1925         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1926       } else {
1927         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1928       }
1929       GenerateFPJumps(cond, &true_label, &false_label);
1930       break;
1931     }
1932     case DataType::Type::kFloat64: {
1933       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1934       if (rhs.IsConstant()) {
1935         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1936         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1937       } else if (rhs.IsDoubleStackSlot()) {
1938         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1939       } else {
1940         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1941       }
1942       GenerateFPJumps(cond, &true_label, &false_label);
1943       break;
1944     }
1945   }
1946 
1947   // Convert the jumps into the result.
1948   NearLabel done_label;
1949 
1950   // False case: result = 0.
1951   __ Bind(&false_label);
1952   __ xorl(reg, reg);
1953   __ jmp(&done_label);
1954 
1955   // True case: result = 1.
1956   __ Bind(&true_label);
1957   __ movl(reg, Immediate(1));
1958   __ Bind(&done_label);
1959 }
1960 
VisitEqual(HEqual * comp)1961 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1962   HandleCondition(comp);
1963 }
1964 
VisitEqual(HEqual * comp)1965 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1966   HandleCondition(comp);
1967 }
1968 
VisitNotEqual(HNotEqual * comp)1969 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1970   HandleCondition(comp);
1971 }
1972 
VisitNotEqual(HNotEqual * comp)1973 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1974   HandleCondition(comp);
1975 }
1976 
VisitLessThan(HLessThan * comp)1977 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1978   HandleCondition(comp);
1979 }
1980 
VisitLessThan(HLessThan * comp)1981 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1982   HandleCondition(comp);
1983 }
1984 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1985 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1986   HandleCondition(comp);
1987 }
1988 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1989 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1990   HandleCondition(comp);
1991 }
1992 
VisitGreaterThan(HGreaterThan * comp)1993 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1994   HandleCondition(comp);
1995 }
1996 
VisitGreaterThan(HGreaterThan * comp)1997 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1998   HandleCondition(comp);
1999 }
2000 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2001 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2002   HandleCondition(comp);
2003 }
2004 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2005 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2006   HandleCondition(comp);
2007 }
2008 
VisitBelow(HBelow * comp)2009 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2010   HandleCondition(comp);
2011 }
2012 
VisitBelow(HBelow * comp)2013 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2014   HandleCondition(comp);
2015 }
2016 
VisitBelowOrEqual(HBelowOrEqual * comp)2017 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2018   HandleCondition(comp);
2019 }
2020 
VisitBelowOrEqual(HBelowOrEqual * comp)2021 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2022   HandleCondition(comp);
2023 }
2024 
VisitAbove(HAbove * comp)2025 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2026   HandleCondition(comp);
2027 }
2028 
VisitAbove(HAbove * comp)2029 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2030   HandleCondition(comp);
2031 }
2032 
VisitAboveOrEqual(HAboveOrEqual * comp)2033 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2034   HandleCondition(comp);
2035 }
2036 
VisitAboveOrEqual(HAboveOrEqual * comp)2037 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2038   HandleCondition(comp);
2039 }
2040 
VisitCompare(HCompare * compare)2041 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2042   LocationSummary* locations =
2043       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2044   switch (compare->InputAt(0)->GetType()) {
2045     case DataType::Type::kBool:
2046     case DataType::Type::kUint8:
2047     case DataType::Type::kInt8:
2048     case DataType::Type::kUint16:
2049     case DataType::Type::kInt16:
2050     case DataType::Type::kInt32:
2051     case DataType::Type::kInt64: {
2052       locations->SetInAt(0, Location::RequiresRegister());
2053       locations->SetInAt(1, Location::Any());
2054       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2055       break;
2056     }
2057     case DataType::Type::kFloat32:
2058     case DataType::Type::kFloat64: {
2059       locations->SetInAt(0, Location::RequiresFpuRegister());
2060       locations->SetInAt(1, Location::Any());
2061       locations->SetOut(Location::RequiresRegister());
2062       break;
2063     }
2064     default:
2065       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2066   }
2067 }
2068 
VisitCompare(HCompare * compare)2069 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2070   LocationSummary* locations = compare->GetLocations();
2071   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2072   Location left = locations->InAt(0);
2073   Location right = locations->InAt(1);
2074 
2075   NearLabel less, greater, done;
2076   DataType::Type type = compare->InputAt(0)->GetType();
2077   Condition less_cond = kLess;
2078 
2079   switch (type) {
2080     case DataType::Type::kBool:
2081     case DataType::Type::kUint8:
2082     case DataType::Type::kInt8:
2083     case DataType::Type::kUint16:
2084     case DataType::Type::kInt16:
2085     case DataType::Type::kInt32: {
2086       codegen_->GenerateIntCompare(left, right);
2087       break;
2088     }
2089     case DataType::Type::kInt64: {
2090       codegen_->GenerateLongCompare(left, right);
2091       break;
2092     }
2093     case DataType::Type::kFloat32: {
2094       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2095       if (right.IsConstant()) {
2096         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2097         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2098       } else if (right.IsStackSlot()) {
2099         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2100       } else {
2101         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2102       }
2103       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2104       less_cond = kBelow;  //  ucomis{s,d} sets CF
2105       break;
2106     }
2107     case DataType::Type::kFloat64: {
2108       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2109       if (right.IsConstant()) {
2110         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2111         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2112       } else if (right.IsDoubleStackSlot()) {
2113         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2114       } else {
2115         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2116       }
2117       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2118       less_cond = kBelow;  //  ucomis{s,d} sets CF
2119       break;
2120     }
2121     default:
2122       LOG(FATAL) << "Unexpected compare type " << type;
2123   }
2124 
2125   __ movl(out, Immediate(0));
2126   __ j(kEqual, &done);
2127   __ j(less_cond, &less);
2128 
2129   __ Bind(&greater);
2130   __ movl(out, Immediate(1));
2131   __ jmp(&done);
2132 
2133   __ Bind(&less);
2134   __ movl(out, Immediate(-1));
2135 
2136   __ Bind(&done);
2137 }
2138 
VisitIntConstant(HIntConstant * constant)2139 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2140   LocationSummary* locations =
2141       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2142   locations->SetOut(Location::ConstantLocation(constant));
2143 }
2144 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2145 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2146   // Will be generated at use site.
2147 }
2148 
VisitNullConstant(HNullConstant * constant)2149 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2150   LocationSummary* locations =
2151       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2152   locations->SetOut(Location::ConstantLocation(constant));
2153 }
2154 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2155 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2156   // Will be generated at use site.
2157 }
2158 
VisitLongConstant(HLongConstant * constant)2159 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2160   LocationSummary* locations =
2161       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2162   locations->SetOut(Location::ConstantLocation(constant));
2163 }
2164 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2165 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2166   // Will be generated at use site.
2167 }
2168 
VisitFloatConstant(HFloatConstant * constant)2169 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2170   LocationSummary* locations =
2171       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2172   locations->SetOut(Location::ConstantLocation(constant));
2173 }
2174 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2175 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2176   // Will be generated at use site.
2177 }
2178 
VisitDoubleConstant(HDoubleConstant * constant)2179 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2180   LocationSummary* locations =
2181       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2182   locations->SetOut(Location::ConstantLocation(constant));
2183 }
2184 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2185 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2186     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2187   // Will be generated at use site.
2188 }
2189 
VisitConstructorFence(HConstructorFence * constructor_fence)2190 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2191   constructor_fence->SetLocations(nullptr);
2192 }
2193 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2194 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2195     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2196   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2197 }
2198 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2199 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2200   memory_barrier->SetLocations(nullptr);
2201 }
2202 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2203 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2204   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2205 }
2206 
VisitReturnVoid(HReturnVoid * ret)2207 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2208   ret->SetLocations(nullptr);
2209 }
2210 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2211 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2212   codegen_->GenerateFrameExit();
2213 }
2214 
VisitReturn(HReturn * ret)2215 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2216   LocationSummary* locations =
2217       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2218   switch (ret->InputAt(0)->GetType()) {
2219     case DataType::Type::kReference:
2220     case DataType::Type::kBool:
2221     case DataType::Type::kUint8:
2222     case DataType::Type::kInt8:
2223     case DataType::Type::kUint16:
2224     case DataType::Type::kInt16:
2225     case DataType::Type::kInt32:
2226     case DataType::Type::kInt64:
2227       locations->SetInAt(0, Location::RegisterLocation(RAX));
2228       break;
2229 
2230     case DataType::Type::kFloat32:
2231     case DataType::Type::kFloat64:
2232       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2233       break;
2234 
2235     default:
2236       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2237   }
2238 }
2239 
VisitReturn(HReturn * ret)2240 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2241   if (kIsDebugBuild) {
2242     switch (ret->InputAt(0)->GetType()) {
2243       case DataType::Type::kReference:
2244       case DataType::Type::kBool:
2245       case DataType::Type::kUint8:
2246       case DataType::Type::kInt8:
2247       case DataType::Type::kUint16:
2248       case DataType::Type::kInt16:
2249       case DataType::Type::kInt32:
2250       case DataType::Type::kInt64:
2251         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2252         break;
2253 
2254       case DataType::Type::kFloat32:
2255       case DataType::Type::kFloat64:
2256         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2257                   XMM0);
2258         break;
2259 
2260       default:
2261         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2262     }
2263   }
2264   codegen_->GenerateFrameExit();
2265 }
2266 
GetReturnLocation(DataType::Type type) const2267 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2268   switch (type) {
2269     case DataType::Type::kReference:
2270     case DataType::Type::kBool:
2271     case DataType::Type::kUint8:
2272     case DataType::Type::kInt8:
2273     case DataType::Type::kUint16:
2274     case DataType::Type::kInt16:
2275     case DataType::Type::kUint32:
2276     case DataType::Type::kInt32:
2277     case DataType::Type::kUint64:
2278     case DataType::Type::kInt64:
2279       return Location::RegisterLocation(RAX);
2280 
2281     case DataType::Type::kVoid:
2282       return Location::NoLocation();
2283 
2284     case DataType::Type::kFloat64:
2285     case DataType::Type::kFloat32:
2286       return Location::FpuRegisterLocation(XMM0);
2287   }
2288 
2289   UNREACHABLE();
2290 }
2291 
GetMethodLocation() const2292 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2293   return Location::RegisterLocation(kMethodRegisterArgument);
2294 }
2295 
GetNextLocation(DataType::Type type)2296 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2297   switch (type) {
2298     case DataType::Type::kReference:
2299     case DataType::Type::kBool:
2300     case DataType::Type::kUint8:
2301     case DataType::Type::kInt8:
2302     case DataType::Type::kUint16:
2303     case DataType::Type::kInt16:
2304     case DataType::Type::kInt32: {
2305       uint32_t index = gp_index_++;
2306       stack_index_++;
2307       if (index < calling_convention.GetNumberOfRegisters()) {
2308         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2309       } else {
2310         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2311       }
2312     }
2313 
2314     case DataType::Type::kInt64: {
2315       uint32_t index = gp_index_;
2316       stack_index_ += 2;
2317       if (index < calling_convention.GetNumberOfRegisters()) {
2318         gp_index_ += 1;
2319         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2320       } else {
2321         gp_index_ += 2;
2322         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2323       }
2324     }
2325 
2326     case DataType::Type::kFloat32: {
2327       uint32_t index = float_index_++;
2328       stack_index_++;
2329       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2330         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2331       } else {
2332         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2333       }
2334     }
2335 
2336     case DataType::Type::kFloat64: {
2337       uint32_t index = float_index_++;
2338       stack_index_ += 2;
2339       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2340         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2341       } else {
2342         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2343       }
2344     }
2345 
2346     case DataType::Type::kUint32:
2347     case DataType::Type::kUint64:
2348     case DataType::Type::kVoid:
2349       LOG(FATAL) << "Unexpected parameter type " << type;
2350       break;
2351   }
2352   return Location::NoLocation();
2353 }
2354 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2355 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2356   // The trampoline uses the same calling convention as dex calling conventions,
2357   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2358   // the method_idx.
2359   HandleInvoke(invoke);
2360 }
2361 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2362 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2363   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2364 }
2365 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2366 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2367   // Explicit clinit checks triggered by static invokes must have been pruned by
2368   // art::PrepareForRegisterAllocation.
2369   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2370 
2371   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2372   if (intrinsic.TryDispatch(invoke)) {
2373     return;
2374   }
2375 
2376   HandleInvoke(invoke);
2377 }
2378 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2379 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2380   if (invoke->GetLocations()->Intrinsified()) {
2381     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2382     intrinsic.Dispatch(invoke);
2383     return true;
2384   }
2385   return false;
2386 }
2387 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2388 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2389   // Explicit clinit checks triggered by static invokes must have been pruned by
2390   // art::PrepareForRegisterAllocation.
2391   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2392 
2393   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2394     return;
2395   }
2396 
2397   LocationSummary* locations = invoke->GetLocations();
2398   codegen_->GenerateStaticOrDirectCall(
2399       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2400 }
2401 
HandleInvoke(HInvoke * invoke)2402 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2403   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2404   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2405 }
2406 
VisitInvokeVirtual(HInvokeVirtual * invoke)2407 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2408   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2409   if (intrinsic.TryDispatch(invoke)) {
2410     return;
2411   }
2412 
2413   HandleInvoke(invoke);
2414 }
2415 
VisitInvokeVirtual(HInvokeVirtual * invoke)2416 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2417   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2418     return;
2419   }
2420 
2421   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2422   DCHECK(!codegen_->IsLeafMethod());
2423 }
2424 
VisitInvokeInterface(HInvokeInterface * invoke)2425 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2426   HandleInvoke(invoke);
2427   // Add the hidden argument.
2428   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2429 }
2430 
VisitInvokeInterface(HInvokeInterface * invoke)2431 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2432   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2433   LocationSummary* locations = invoke->GetLocations();
2434   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2435   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2436   Location receiver = locations->InAt(0);
2437   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2438 
2439   // Set the hidden argument. This is safe to do this here, as RAX
2440   // won't be modified thereafter, before the `call` instruction.
2441   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2442   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2443 
2444   if (receiver.IsStackSlot()) {
2445     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2446     // /* HeapReference<Class> */ temp = temp->klass_
2447     __ movl(temp, Address(temp, class_offset));
2448   } else {
2449     // /* HeapReference<Class> */ temp = receiver->klass_
2450     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2451   }
2452   codegen_->MaybeRecordImplicitNullCheck(invoke);
2453   // Instead of simply (possibly) unpoisoning `temp` here, we should
2454   // emit a read barrier for the previous class reference load.
2455   // However this is not required in practice, as this is an
2456   // intermediate/temporary reference and because the current
2457   // concurrent copying collector keeps the from-space memory
2458   // intact/accessible until the end of the marking phase (the
2459   // concurrent copying collector may not in the future).
2460   __ MaybeUnpoisonHeapReference(temp);
2461   // temp = temp->GetAddressOfIMT()
2462   __ movq(temp,
2463       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2464   // temp = temp->GetImtEntryAt(method_offset);
2465   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2466       invoke->GetImtIndex(), kX86_64PointerSize));
2467   // temp = temp->GetImtEntryAt(method_offset);
2468   __ movq(temp, Address(temp, method_offset));
2469   // call temp->GetEntryPoint();
2470   __ call(Address(
2471       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2472 
2473   DCHECK(!codegen_->IsLeafMethod());
2474   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2475 }
2476 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2477 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2478   HandleInvoke(invoke);
2479 }
2480 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2481 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2482   codegen_->GenerateInvokePolymorphicCall(invoke);
2483 }
2484 
VisitNeg(HNeg * neg)2485 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2486   LocationSummary* locations =
2487       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2488   switch (neg->GetResultType()) {
2489     case DataType::Type::kInt32:
2490     case DataType::Type::kInt64:
2491       locations->SetInAt(0, Location::RequiresRegister());
2492       locations->SetOut(Location::SameAsFirstInput());
2493       break;
2494 
2495     case DataType::Type::kFloat32:
2496     case DataType::Type::kFloat64:
2497       locations->SetInAt(0, Location::RequiresFpuRegister());
2498       locations->SetOut(Location::SameAsFirstInput());
2499       locations->AddTemp(Location::RequiresFpuRegister());
2500       break;
2501 
2502     default:
2503       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2504   }
2505 }
2506 
VisitNeg(HNeg * neg)2507 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2508   LocationSummary* locations = neg->GetLocations();
2509   Location out = locations->Out();
2510   Location in = locations->InAt(0);
2511   switch (neg->GetResultType()) {
2512     case DataType::Type::kInt32:
2513       DCHECK(in.IsRegister());
2514       DCHECK(in.Equals(out));
2515       __ negl(out.AsRegister<CpuRegister>());
2516       break;
2517 
2518     case DataType::Type::kInt64:
2519       DCHECK(in.IsRegister());
2520       DCHECK(in.Equals(out));
2521       __ negq(out.AsRegister<CpuRegister>());
2522       break;
2523 
2524     case DataType::Type::kFloat32: {
2525       DCHECK(in.Equals(out));
2526       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2527       // Implement float negation with an exclusive or with value
2528       // 0x80000000 (mask for bit 31, representing the sign of a
2529       // single-precision floating-point number).
2530       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2531       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2532       break;
2533     }
2534 
2535     case DataType::Type::kFloat64: {
2536       DCHECK(in.Equals(out));
2537       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2538       // Implement double negation with an exclusive or with value
2539       // 0x8000000000000000 (mask for bit 63, representing the sign of
2540       // a double-precision floating-point number).
2541       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2542       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2543       break;
2544     }
2545 
2546     default:
2547       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2548   }
2549 }
2550 
VisitTypeConversion(HTypeConversion * conversion)2551 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2552   LocationSummary* locations =
2553       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2554   DataType::Type result_type = conversion->GetResultType();
2555   DataType::Type input_type = conversion->GetInputType();
2556   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2557       << input_type << " -> " << result_type;
2558 
2559   switch (result_type) {
2560     case DataType::Type::kUint8:
2561     case DataType::Type::kInt8:
2562     case DataType::Type::kUint16:
2563     case DataType::Type::kInt16:
2564       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2565       locations->SetInAt(0, Location::Any());
2566       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2567       break;
2568 
2569     case DataType::Type::kInt32:
2570       switch (input_type) {
2571         case DataType::Type::kInt64:
2572           locations->SetInAt(0, Location::Any());
2573           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2574           break;
2575 
2576         case DataType::Type::kFloat32:
2577           locations->SetInAt(0, Location::RequiresFpuRegister());
2578           locations->SetOut(Location::RequiresRegister());
2579           break;
2580 
2581         case DataType::Type::kFloat64:
2582           locations->SetInAt(0, Location::RequiresFpuRegister());
2583           locations->SetOut(Location::RequiresRegister());
2584           break;
2585 
2586         default:
2587           LOG(FATAL) << "Unexpected type conversion from " << input_type
2588                      << " to " << result_type;
2589       }
2590       break;
2591 
2592     case DataType::Type::kInt64:
2593       switch (input_type) {
2594         case DataType::Type::kBool:
2595         case DataType::Type::kUint8:
2596         case DataType::Type::kInt8:
2597         case DataType::Type::kUint16:
2598         case DataType::Type::kInt16:
2599         case DataType::Type::kInt32:
2600           // TODO: We would benefit from a (to-be-implemented)
2601           // Location::RegisterOrStackSlot requirement for this input.
2602           locations->SetInAt(0, Location::RequiresRegister());
2603           locations->SetOut(Location::RequiresRegister());
2604           break;
2605 
2606         case DataType::Type::kFloat32:
2607           locations->SetInAt(0, Location::RequiresFpuRegister());
2608           locations->SetOut(Location::RequiresRegister());
2609           break;
2610 
2611         case DataType::Type::kFloat64:
2612           locations->SetInAt(0, Location::RequiresFpuRegister());
2613           locations->SetOut(Location::RequiresRegister());
2614           break;
2615 
2616         default:
2617           LOG(FATAL) << "Unexpected type conversion from " << input_type
2618                      << " to " << result_type;
2619       }
2620       break;
2621 
2622     case DataType::Type::kFloat32:
2623       switch (input_type) {
2624         case DataType::Type::kBool:
2625         case DataType::Type::kUint8:
2626         case DataType::Type::kInt8:
2627         case DataType::Type::kUint16:
2628         case DataType::Type::kInt16:
2629         case DataType::Type::kInt32:
2630           locations->SetInAt(0, Location::Any());
2631           locations->SetOut(Location::RequiresFpuRegister());
2632           break;
2633 
2634         case DataType::Type::kInt64:
2635           locations->SetInAt(0, Location::Any());
2636           locations->SetOut(Location::RequiresFpuRegister());
2637           break;
2638 
2639         case DataType::Type::kFloat64:
2640           locations->SetInAt(0, Location::Any());
2641           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2642           break;
2643 
2644         default:
2645           LOG(FATAL) << "Unexpected type conversion from " << input_type
2646                      << " to " << result_type;
2647       }
2648       break;
2649 
2650     case DataType::Type::kFloat64:
2651       switch (input_type) {
2652         case DataType::Type::kBool:
2653         case DataType::Type::kUint8:
2654         case DataType::Type::kInt8:
2655         case DataType::Type::kUint16:
2656         case DataType::Type::kInt16:
2657         case DataType::Type::kInt32:
2658           locations->SetInAt(0, Location::Any());
2659           locations->SetOut(Location::RequiresFpuRegister());
2660           break;
2661 
2662         case DataType::Type::kInt64:
2663           locations->SetInAt(0, Location::Any());
2664           locations->SetOut(Location::RequiresFpuRegister());
2665           break;
2666 
2667         case DataType::Type::kFloat32:
2668           locations->SetInAt(0, Location::Any());
2669           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2670           break;
2671 
2672         default:
2673           LOG(FATAL) << "Unexpected type conversion from " << input_type
2674                      << " to " << result_type;
2675       }
2676       break;
2677 
2678     default:
2679       LOG(FATAL) << "Unexpected type conversion from " << input_type
2680                  << " to " << result_type;
2681   }
2682 }
2683 
VisitTypeConversion(HTypeConversion * conversion)2684 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2685   LocationSummary* locations = conversion->GetLocations();
2686   Location out = locations->Out();
2687   Location in = locations->InAt(0);
2688   DataType::Type result_type = conversion->GetResultType();
2689   DataType::Type input_type = conversion->GetInputType();
2690   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2691       << input_type << " -> " << result_type;
2692   switch (result_type) {
2693     case DataType::Type::kUint8:
2694       switch (input_type) {
2695         case DataType::Type::kInt8:
2696         case DataType::Type::kUint16:
2697         case DataType::Type::kInt16:
2698         case DataType::Type::kInt32:
2699         case DataType::Type::kInt64:
2700           if (in.IsRegister()) {
2701             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2702           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2703             __ movzxb(out.AsRegister<CpuRegister>(),
2704                       Address(CpuRegister(RSP), in.GetStackIndex()));
2705           } else {
2706             __ movl(out.AsRegister<CpuRegister>(),
2707                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2708           }
2709           break;
2710 
2711         default:
2712           LOG(FATAL) << "Unexpected type conversion from " << input_type
2713                      << " to " << result_type;
2714       }
2715       break;
2716 
2717     case DataType::Type::kInt8:
2718       switch (input_type) {
2719         case DataType::Type::kUint8:
2720         case DataType::Type::kUint16:
2721         case DataType::Type::kInt16:
2722         case DataType::Type::kInt32:
2723         case DataType::Type::kInt64:
2724           if (in.IsRegister()) {
2725             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2726           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2727             __ movsxb(out.AsRegister<CpuRegister>(),
2728                       Address(CpuRegister(RSP), in.GetStackIndex()));
2729           } else {
2730             __ movl(out.AsRegister<CpuRegister>(),
2731                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2732           }
2733           break;
2734 
2735         default:
2736           LOG(FATAL) << "Unexpected type conversion from " << input_type
2737                      << " to " << result_type;
2738       }
2739       break;
2740 
2741     case DataType::Type::kUint16:
2742       switch (input_type) {
2743         case DataType::Type::kInt8:
2744         case DataType::Type::kInt16:
2745         case DataType::Type::kInt32:
2746         case DataType::Type::kInt64:
2747           if (in.IsRegister()) {
2748             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2749           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2750             __ movzxw(out.AsRegister<CpuRegister>(),
2751                       Address(CpuRegister(RSP), in.GetStackIndex()));
2752           } else {
2753             __ movl(out.AsRegister<CpuRegister>(),
2754                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2755           }
2756           break;
2757 
2758         default:
2759           LOG(FATAL) << "Unexpected type conversion from " << input_type
2760                      << " to " << result_type;
2761       }
2762       break;
2763 
2764     case DataType::Type::kInt16:
2765       switch (input_type) {
2766         case DataType::Type::kUint16:
2767         case DataType::Type::kInt32:
2768         case DataType::Type::kInt64:
2769           if (in.IsRegister()) {
2770             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2771           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2772             __ movsxw(out.AsRegister<CpuRegister>(),
2773                       Address(CpuRegister(RSP), in.GetStackIndex()));
2774           } else {
2775             __ movl(out.AsRegister<CpuRegister>(),
2776                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2777           }
2778           break;
2779 
2780         default:
2781           LOG(FATAL) << "Unexpected type conversion from " << input_type
2782                      << " to " << result_type;
2783       }
2784       break;
2785 
2786     case DataType::Type::kInt32:
2787       switch (input_type) {
2788         case DataType::Type::kInt64:
2789           if (in.IsRegister()) {
2790             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2791           } else if (in.IsDoubleStackSlot()) {
2792             __ movl(out.AsRegister<CpuRegister>(),
2793                     Address(CpuRegister(RSP), in.GetStackIndex()));
2794           } else {
2795             DCHECK(in.IsConstant());
2796             DCHECK(in.GetConstant()->IsLongConstant());
2797             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2798             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2799           }
2800           break;
2801 
2802         case DataType::Type::kFloat32: {
2803           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2804           CpuRegister output = out.AsRegister<CpuRegister>();
2805           NearLabel done, nan;
2806 
2807           __ movl(output, Immediate(kPrimIntMax));
2808           // if input >= (float)INT_MAX goto done
2809           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2810           __ j(kAboveEqual, &done);
2811           // if input == NaN goto nan
2812           __ j(kUnordered, &nan);
2813           // output = float-to-int-truncate(input)
2814           __ cvttss2si(output, input, false);
2815           __ jmp(&done);
2816           __ Bind(&nan);
2817           //  output = 0
2818           __ xorl(output, output);
2819           __ Bind(&done);
2820           break;
2821         }
2822 
2823         case DataType::Type::kFloat64: {
2824           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2825           CpuRegister output = out.AsRegister<CpuRegister>();
2826           NearLabel done, nan;
2827 
2828           __ movl(output, Immediate(kPrimIntMax));
2829           // if input >= (double)INT_MAX goto done
2830           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2831           __ j(kAboveEqual, &done);
2832           // if input == NaN goto nan
2833           __ j(kUnordered, &nan);
2834           // output = double-to-int-truncate(input)
2835           __ cvttsd2si(output, input);
2836           __ jmp(&done);
2837           __ Bind(&nan);
2838           //  output = 0
2839           __ xorl(output, output);
2840           __ Bind(&done);
2841           break;
2842         }
2843 
2844         default:
2845           LOG(FATAL) << "Unexpected type conversion from " << input_type
2846                      << " to " << result_type;
2847       }
2848       break;
2849 
2850     case DataType::Type::kInt64:
2851       switch (input_type) {
2852         DCHECK(out.IsRegister());
2853         case DataType::Type::kBool:
2854         case DataType::Type::kUint8:
2855         case DataType::Type::kInt8:
2856         case DataType::Type::kUint16:
2857         case DataType::Type::kInt16:
2858         case DataType::Type::kInt32:
2859           DCHECK(in.IsRegister());
2860           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2861           break;
2862 
2863         case DataType::Type::kFloat32: {
2864           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2865           CpuRegister output = out.AsRegister<CpuRegister>();
2866           NearLabel done, nan;
2867 
2868           codegen_->Load64BitValue(output, kPrimLongMax);
2869           // if input >= (float)LONG_MAX goto done
2870           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2871           __ j(kAboveEqual, &done);
2872           // if input == NaN goto nan
2873           __ j(kUnordered, &nan);
2874           // output = float-to-long-truncate(input)
2875           __ cvttss2si(output, input, true);
2876           __ jmp(&done);
2877           __ Bind(&nan);
2878           //  output = 0
2879           __ xorl(output, output);
2880           __ Bind(&done);
2881           break;
2882         }
2883 
2884         case DataType::Type::kFloat64: {
2885           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2886           CpuRegister output = out.AsRegister<CpuRegister>();
2887           NearLabel done, nan;
2888 
2889           codegen_->Load64BitValue(output, kPrimLongMax);
2890           // if input >= (double)LONG_MAX goto done
2891           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2892           __ j(kAboveEqual, &done);
2893           // if input == NaN goto nan
2894           __ j(kUnordered, &nan);
2895           // output = double-to-long-truncate(input)
2896           __ cvttsd2si(output, input, true);
2897           __ jmp(&done);
2898           __ Bind(&nan);
2899           //  output = 0
2900           __ xorl(output, output);
2901           __ Bind(&done);
2902           break;
2903         }
2904 
2905         default:
2906           LOG(FATAL) << "Unexpected type conversion from " << input_type
2907                      << " to " << result_type;
2908       }
2909       break;
2910 
2911     case DataType::Type::kFloat32:
2912       switch (input_type) {
2913         case DataType::Type::kBool:
2914         case DataType::Type::kUint8:
2915         case DataType::Type::kInt8:
2916         case DataType::Type::kUint16:
2917         case DataType::Type::kInt16:
2918         case DataType::Type::kInt32:
2919           if (in.IsRegister()) {
2920             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2921           } else if (in.IsConstant()) {
2922             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2923             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2924             codegen_->Load32BitValue(dest, static_cast<float>(v));
2925           } else {
2926             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2927                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2928           }
2929           break;
2930 
2931         case DataType::Type::kInt64:
2932           if (in.IsRegister()) {
2933             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2934           } else if (in.IsConstant()) {
2935             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2936             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2937             codegen_->Load32BitValue(dest, static_cast<float>(v));
2938           } else {
2939             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2940                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2941           }
2942           break;
2943 
2944         case DataType::Type::kFloat64:
2945           if (in.IsFpuRegister()) {
2946             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2947           } else if (in.IsConstant()) {
2948             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2949             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2950             codegen_->Load32BitValue(dest, static_cast<float>(v));
2951           } else {
2952             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2953                         Address(CpuRegister(RSP), in.GetStackIndex()));
2954           }
2955           break;
2956 
2957         default:
2958           LOG(FATAL) << "Unexpected type conversion from " << input_type
2959                      << " to " << result_type;
2960       }
2961       break;
2962 
2963     case DataType::Type::kFloat64:
2964       switch (input_type) {
2965         case DataType::Type::kBool:
2966         case DataType::Type::kUint8:
2967         case DataType::Type::kInt8:
2968         case DataType::Type::kUint16:
2969         case DataType::Type::kInt16:
2970         case DataType::Type::kInt32:
2971           if (in.IsRegister()) {
2972             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2973           } else if (in.IsConstant()) {
2974             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2975             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2976             codegen_->Load64BitValue(dest, static_cast<double>(v));
2977           } else {
2978             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2979                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2980           }
2981           break;
2982 
2983         case DataType::Type::kInt64:
2984           if (in.IsRegister()) {
2985             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2986           } else if (in.IsConstant()) {
2987             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2988             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2989             codegen_->Load64BitValue(dest, static_cast<double>(v));
2990           } else {
2991             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2992                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2993           }
2994           break;
2995 
2996         case DataType::Type::kFloat32:
2997           if (in.IsFpuRegister()) {
2998             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2999           } else if (in.IsConstant()) {
3000             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3001             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3002             codegen_->Load64BitValue(dest, static_cast<double>(v));
3003           } else {
3004             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3005                         Address(CpuRegister(RSP), in.GetStackIndex()));
3006           }
3007           break;
3008 
3009         default:
3010           LOG(FATAL) << "Unexpected type conversion from " << input_type
3011                      << " to " << result_type;
3012       }
3013       break;
3014 
3015     default:
3016       LOG(FATAL) << "Unexpected type conversion from " << input_type
3017                  << " to " << result_type;
3018   }
3019 }
3020 
VisitAdd(HAdd * add)3021 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3022   LocationSummary* locations =
3023       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3024   switch (add->GetResultType()) {
3025     case DataType::Type::kInt32: {
3026       locations->SetInAt(0, Location::RequiresRegister());
3027       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3028       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3029       break;
3030     }
3031 
3032     case DataType::Type::kInt64: {
3033       locations->SetInAt(0, Location::RequiresRegister());
3034       // We can use a leaq or addq if the constant can fit in an immediate.
3035       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3036       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3037       break;
3038     }
3039 
3040     case DataType::Type::kFloat64:
3041     case DataType::Type::kFloat32: {
3042       locations->SetInAt(0, Location::RequiresFpuRegister());
3043       locations->SetInAt(1, Location::Any());
3044       locations->SetOut(Location::SameAsFirstInput());
3045       break;
3046     }
3047 
3048     default:
3049       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3050   }
3051 }
3052 
VisitAdd(HAdd * add)3053 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3054   LocationSummary* locations = add->GetLocations();
3055   Location first = locations->InAt(0);
3056   Location second = locations->InAt(1);
3057   Location out = locations->Out();
3058 
3059   switch (add->GetResultType()) {
3060     case DataType::Type::kInt32: {
3061       if (second.IsRegister()) {
3062         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3063           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3064         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3065           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3066         } else {
3067           __ leal(out.AsRegister<CpuRegister>(), Address(
3068               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3069         }
3070       } else if (second.IsConstant()) {
3071         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3072           __ addl(out.AsRegister<CpuRegister>(),
3073                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3074         } else {
3075           __ leal(out.AsRegister<CpuRegister>(), Address(
3076               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3077         }
3078       } else {
3079         DCHECK(first.Equals(locations->Out()));
3080         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3081       }
3082       break;
3083     }
3084 
3085     case DataType::Type::kInt64: {
3086       if (second.IsRegister()) {
3087         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3088           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3089         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3090           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3091         } else {
3092           __ leaq(out.AsRegister<CpuRegister>(), Address(
3093               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3094         }
3095       } else {
3096         DCHECK(second.IsConstant());
3097         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3098         int32_t int32_value = Low32Bits(value);
3099         DCHECK_EQ(int32_value, value);
3100         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3101           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3102         } else {
3103           __ leaq(out.AsRegister<CpuRegister>(), Address(
3104               first.AsRegister<CpuRegister>(), int32_value));
3105         }
3106       }
3107       break;
3108     }
3109 
3110     case DataType::Type::kFloat32: {
3111       if (second.IsFpuRegister()) {
3112         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3113       } else if (second.IsConstant()) {
3114         __ addss(first.AsFpuRegister<XmmRegister>(),
3115                  codegen_->LiteralFloatAddress(
3116                      second.GetConstant()->AsFloatConstant()->GetValue()));
3117       } else {
3118         DCHECK(second.IsStackSlot());
3119         __ addss(first.AsFpuRegister<XmmRegister>(),
3120                  Address(CpuRegister(RSP), second.GetStackIndex()));
3121       }
3122       break;
3123     }
3124 
3125     case DataType::Type::kFloat64: {
3126       if (second.IsFpuRegister()) {
3127         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3128       } else if (second.IsConstant()) {
3129         __ addsd(first.AsFpuRegister<XmmRegister>(),
3130                  codegen_->LiteralDoubleAddress(
3131                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3132       } else {
3133         DCHECK(second.IsDoubleStackSlot());
3134         __ addsd(first.AsFpuRegister<XmmRegister>(),
3135                  Address(CpuRegister(RSP), second.GetStackIndex()));
3136       }
3137       break;
3138     }
3139 
3140     default:
3141       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3142   }
3143 }
3144 
VisitSub(HSub * sub)3145 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3146   LocationSummary* locations =
3147       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3148   switch (sub->GetResultType()) {
3149     case DataType::Type::kInt32: {
3150       locations->SetInAt(0, Location::RequiresRegister());
3151       locations->SetInAt(1, Location::Any());
3152       locations->SetOut(Location::SameAsFirstInput());
3153       break;
3154     }
3155     case DataType::Type::kInt64: {
3156       locations->SetInAt(0, Location::RequiresRegister());
3157       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3158       locations->SetOut(Location::SameAsFirstInput());
3159       break;
3160     }
3161     case DataType::Type::kFloat32:
3162     case DataType::Type::kFloat64: {
3163       locations->SetInAt(0, Location::RequiresFpuRegister());
3164       locations->SetInAt(1, Location::Any());
3165       locations->SetOut(Location::SameAsFirstInput());
3166       break;
3167     }
3168     default:
3169       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3170   }
3171 }
3172 
VisitSub(HSub * sub)3173 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3174   LocationSummary* locations = sub->GetLocations();
3175   Location first = locations->InAt(0);
3176   Location second = locations->InAt(1);
3177   DCHECK(first.Equals(locations->Out()));
3178   switch (sub->GetResultType()) {
3179     case DataType::Type::kInt32: {
3180       if (second.IsRegister()) {
3181         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3182       } else if (second.IsConstant()) {
3183         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3184         __ subl(first.AsRegister<CpuRegister>(), imm);
3185       } else {
3186         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3187       }
3188       break;
3189     }
3190     case DataType::Type::kInt64: {
3191       if (second.IsConstant()) {
3192         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3193         DCHECK(IsInt<32>(value));
3194         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3195       } else {
3196         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3197       }
3198       break;
3199     }
3200 
3201     case DataType::Type::kFloat32: {
3202       if (second.IsFpuRegister()) {
3203         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3204       } else if (second.IsConstant()) {
3205         __ subss(first.AsFpuRegister<XmmRegister>(),
3206                  codegen_->LiteralFloatAddress(
3207                      second.GetConstant()->AsFloatConstant()->GetValue()));
3208       } else {
3209         DCHECK(second.IsStackSlot());
3210         __ subss(first.AsFpuRegister<XmmRegister>(),
3211                  Address(CpuRegister(RSP), second.GetStackIndex()));
3212       }
3213       break;
3214     }
3215 
3216     case DataType::Type::kFloat64: {
3217       if (second.IsFpuRegister()) {
3218         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3219       } else if (second.IsConstant()) {
3220         __ subsd(first.AsFpuRegister<XmmRegister>(),
3221                  codegen_->LiteralDoubleAddress(
3222                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3223       } else {
3224         DCHECK(second.IsDoubleStackSlot());
3225         __ subsd(first.AsFpuRegister<XmmRegister>(),
3226                  Address(CpuRegister(RSP), second.GetStackIndex()));
3227       }
3228       break;
3229     }
3230 
3231     default:
3232       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3233   }
3234 }
3235 
VisitMul(HMul * mul)3236 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3237   LocationSummary* locations =
3238       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3239   switch (mul->GetResultType()) {
3240     case DataType::Type::kInt32: {
3241       locations->SetInAt(0, Location::RequiresRegister());
3242       locations->SetInAt(1, Location::Any());
3243       if (mul->InputAt(1)->IsIntConstant()) {
3244         // Can use 3 operand multiply.
3245         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3246       } else {
3247         locations->SetOut(Location::SameAsFirstInput());
3248       }
3249       break;
3250     }
3251     case DataType::Type::kInt64: {
3252       locations->SetInAt(0, Location::RequiresRegister());
3253       locations->SetInAt(1, Location::Any());
3254       if (mul->InputAt(1)->IsLongConstant() &&
3255           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3256         // Can use 3 operand multiply.
3257         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3258       } else {
3259         locations->SetOut(Location::SameAsFirstInput());
3260       }
3261       break;
3262     }
3263     case DataType::Type::kFloat32:
3264     case DataType::Type::kFloat64: {
3265       locations->SetInAt(0, Location::RequiresFpuRegister());
3266       locations->SetInAt(1, Location::Any());
3267       locations->SetOut(Location::SameAsFirstInput());
3268       break;
3269     }
3270 
3271     default:
3272       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3273   }
3274 }
3275 
VisitMul(HMul * mul)3276 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3277   LocationSummary* locations = mul->GetLocations();
3278   Location first = locations->InAt(0);
3279   Location second = locations->InAt(1);
3280   Location out = locations->Out();
3281   switch (mul->GetResultType()) {
3282     case DataType::Type::kInt32:
3283       // The constant may have ended up in a register, so test explicitly to avoid
3284       // problems where the output may not be the same as the first operand.
3285       if (mul->InputAt(1)->IsIntConstant()) {
3286         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3287         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3288       } else if (second.IsRegister()) {
3289         DCHECK(first.Equals(out));
3290         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3291       } else {
3292         DCHECK(first.Equals(out));
3293         DCHECK(second.IsStackSlot());
3294         __ imull(first.AsRegister<CpuRegister>(),
3295                  Address(CpuRegister(RSP), second.GetStackIndex()));
3296       }
3297       break;
3298     case DataType::Type::kInt64: {
3299       // The constant may have ended up in a register, so test explicitly to avoid
3300       // problems where the output may not be the same as the first operand.
3301       if (mul->InputAt(1)->IsLongConstant()) {
3302         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3303         if (IsInt<32>(value)) {
3304           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3305                    Immediate(static_cast<int32_t>(value)));
3306         } else {
3307           // Have to use the constant area.
3308           DCHECK(first.Equals(out));
3309           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3310         }
3311       } else if (second.IsRegister()) {
3312         DCHECK(first.Equals(out));
3313         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3314       } else {
3315         DCHECK(second.IsDoubleStackSlot());
3316         DCHECK(first.Equals(out));
3317         __ imulq(first.AsRegister<CpuRegister>(),
3318                  Address(CpuRegister(RSP), second.GetStackIndex()));
3319       }
3320       break;
3321     }
3322 
3323     case DataType::Type::kFloat32: {
3324       DCHECK(first.Equals(out));
3325       if (second.IsFpuRegister()) {
3326         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3327       } else if (second.IsConstant()) {
3328         __ mulss(first.AsFpuRegister<XmmRegister>(),
3329                  codegen_->LiteralFloatAddress(
3330                      second.GetConstant()->AsFloatConstant()->GetValue()));
3331       } else {
3332         DCHECK(second.IsStackSlot());
3333         __ mulss(first.AsFpuRegister<XmmRegister>(),
3334                  Address(CpuRegister(RSP), second.GetStackIndex()));
3335       }
3336       break;
3337     }
3338 
3339     case DataType::Type::kFloat64: {
3340       DCHECK(first.Equals(out));
3341       if (second.IsFpuRegister()) {
3342         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3343       } else if (second.IsConstant()) {
3344         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3345                  codegen_->LiteralDoubleAddress(
3346                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3347       } else {
3348         DCHECK(second.IsDoubleStackSlot());
3349         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3350                  Address(CpuRegister(RSP), second.GetStackIndex()));
3351       }
3352       break;
3353     }
3354 
3355     default:
3356       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3357   }
3358 }
3359 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3360 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3361                                                      uint32_t stack_adjustment, bool is_float) {
3362   if (source.IsStackSlot()) {
3363     DCHECK(is_float);
3364     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3365   } else if (source.IsDoubleStackSlot()) {
3366     DCHECK(!is_float);
3367     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3368   } else {
3369     // Write the value to the temporary location on the stack and load to FP stack.
3370     if (is_float) {
3371       Location stack_temp = Location::StackSlot(temp_offset);
3372       codegen_->Move(stack_temp, source);
3373       __ flds(Address(CpuRegister(RSP), temp_offset));
3374     } else {
3375       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3376       codegen_->Move(stack_temp, source);
3377       __ fldl(Address(CpuRegister(RSP), temp_offset));
3378     }
3379   }
3380 }
3381 
GenerateRemFP(HRem * rem)3382 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3383   DataType::Type type = rem->GetResultType();
3384   bool is_float = type == DataType::Type::kFloat32;
3385   size_t elem_size = DataType::Size(type);
3386   LocationSummary* locations = rem->GetLocations();
3387   Location first = locations->InAt(0);
3388   Location second = locations->InAt(1);
3389   Location out = locations->Out();
3390 
3391   // Create stack space for 2 elements.
3392   // TODO: enhance register allocator to ask for stack temporaries.
3393   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3394 
3395   // Load the values to the FP stack in reverse order, using temporaries if needed.
3396   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3397   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3398 
3399   // Loop doing FPREM until we stabilize.
3400   NearLabel retry;
3401   __ Bind(&retry);
3402   __ fprem();
3403 
3404   // Move FP status to AX.
3405   __ fstsw();
3406 
3407   // And see if the argument reduction is complete. This is signaled by the
3408   // C2 FPU flag bit set to 0.
3409   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3410   __ j(kNotEqual, &retry);
3411 
3412   // We have settled on the final value. Retrieve it into an XMM register.
3413   // Store FP top of stack to real stack.
3414   if (is_float) {
3415     __ fsts(Address(CpuRegister(RSP), 0));
3416   } else {
3417     __ fstl(Address(CpuRegister(RSP), 0));
3418   }
3419 
3420   // Pop the 2 items from the FP stack.
3421   __ fucompp();
3422 
3423   // Load the value from the stack into an XMM register.
3424   DCHECK(out.IsFpuRegister()) << out;
3425   if (is_float) {
3426     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3427   } else {
3428     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3429   }
3430 
3431   // And remove the temporary stack space we allocated.
3432   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3433 }
3434 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3435 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3436   DCHECK(instruction->IsDiv() || instruction->IsRem());
3437 
3438   LocationSummary* locations = instruction->GetLocations();
3439   Location second = locations->InAt(1);
3440   DCHECK(second.IsConstant());
3441 
3442   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3443   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3444   int64_t imm = Int64FromConstant(second.GetConstant());
3445 
3446   DCHECK(imm == 1 || imm == -1);
3447 
3448   switch (instruction->GetResultType()) {
3449     case DataType::Type::kInt32: {
3450       if (instruction->IsRem()) {
3451         __ xorl(output_register, output_register);
3452       } else {
3453         __ movl(output_register, input_register);
3454         if (imm == -1) {
3455           __ negl(output_register);
3456         }
3457       }
3458       break;
3459     }
3460 
3461     case DataType::Type::kInt64: {
3462       if (instruction->IsRem()) {
3463         __ xorl(output_register, output_register);
3464       } else {
3465         __ movq(output_register, input_register);
3466         if (imm == -1) {
3467           __ negq(output_register);
3468         }
3469       }
3470       break;
3471     }
3472 
3473     default:
3474       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3475   }
3476 }
3477 
DivByPowerOfTwo(HDiv * instruction)3478 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3479   LocationSummary* locations = instruction->GetLocations();
3480   Location second = locations->InAt(1);
3481 
3482   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3483   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3484 
3485   int64_t imm = Int64FromConstant(second.GetConstant());
3486   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3487   uint64_t abs_imm = AbsOrMin(imm);
3488 
3489   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3490 
3491   if (instruction->GetResultType() == DataType::Type::kInt32) {
3492     __ leal(tmp, Address(numerator, abs_imm - 1));
3493     __ testl(numerator, numerator);
3494     __ cmov(kGreaterEqual, tmp, numerator);
3495     int shift = CTZ(imm);
3496     __ sarl(tmp, Immediate(shift));
3497 
3498     if (imm < 0) {
3499       __ negl(tmp);
3500     }
3501 
3502     __ movl(output_register, tmp);
3503   } else {
3504     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3505     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3506 
3507     codegen_->Load64BitValue(rdx, abs_imm - 1);
3508     __ addq(rdx, numerator);
3509     __ testq(numerator, numerator);
3510     __ cmov(kGreaterEqual, rdx, numerator);
3511     int shift = CTZ(imm);
3512     __ sarq(rdx, Immediate(shift));
3513 
3514     if (imm < 0) {
3515       __ negq(rdx);
3516     }
3517 
3518     __ movq(output_register, rdx);
3519   }
3520 }
3521 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3522 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3523   DCHECK(instruction->IsDiv() || instruction->IsRem());
3524 
3525   LocationSummary* locations = instruction->GetLocations();
3526   Location second = locations->InAt(1);
3527 
3528   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3529       : locations->GetTemp(0).AsRegister<CpuRegister>();
3530   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3531   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3532       : locations->Out().AsRegister<CpuRegister>();
3533   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3534 
3535   DCHECK_EQ(RAX, eax.AsRegister());
3536   DCHECK_EQ(RDX, edx.AsRegister());
3537   if (instruction->IsDiv()) {
3538     DCHECK_EQ(RAX, out.AsRegister());
3539   } else {
3540     DCHECK_EQ(RDX, out.AsRegister());
3541   }
3542 
3543   int64_t magic;
3544   int shift;
3545 
3546   // TODO: can these branches be written as one?
3547   if (instruction->GetResultType() == DataType::Type::kInt32) {
3548     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3549 
3550     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3551 
3552     __ movl(numerator, eax);
3553 
3554     __ movl(eax, Immediate(magic));
3555     __ imull(numerator);
3556 
3557     if (imm > 0 && magic < 0) {
3558       __ addl(edx, numerator);
3559     } else if (imm < 0 && magic > 0) {
3560       __ subl(edx, numerator);
3561     }
3562 
3563     if (shift != 0) {
3564       __ sarl(edx, Immediate(shift));
3565     }
3566 
3567     __ movl(eax, edx);
3568     __ shrl(edx, Immediate(31));
3569     __ addl(edx, eax);
3570 
3571     if (instruction->IsRem()) {
3572       __ movl(eax, numerator);
3573       __ imull(edx, Immediate(imm));
3574       __ subl(eax, edx);
3575       __ movl(edx, eax);
3576     } else {
3577       __ movl(eax, edx);
3578     }
3579   } else {
3580     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3581 
3582     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3583 
3584     CpuRegister rax = eax;
3585     CpuRegister rdx = edx;
3586 
3587     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3588 
3589     // Save the numerator.
3590     __ movq(numerator, rax);
3591 
3592     // RAX = magic
3593     codegen_->Load64BitValue(rax, magic);
3594 
3595     // RDX:RAX = magic * numerator
3596     __ imulq(numerator);
3597 
3598     if (imm > 0 && magic < 0) {
3599       // RDX += numerator
3600       __ addq(rdx, numerator);
3601     } else if (imm < 0 && magic > 0) {
3602       // RDX -= numerator
3603       __ subq(rdx, numerator);
3604     }
3605 
3606     // Shift if needed.
3607     if (shift != 0) {
3608       __ sarq(rdx, Immediate(shift));
3609     }
3610 
3611     // RDX += 1 if RDX < 0
3612     __ movq(rax, rdx);
3613     __ shrq(rdx, Immediate(63));
3614     __ addq(rdx, rax);
3615 
3616     if (instruction->IsRem()) {
3617       __ movq(rax, numerator);
3618 
3619       if (IsInt<32>(imm)) {
3620         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3621       } else {
3622         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3623       }
3624 
3625       __ subq(rax, rdx);
3626       __ movq(rdx, rax);
3627     } else {
3628       __ movq(rax, rdx);
3629     }
3630   }
3631 }
3632 
GenerateDivRemIntegral(HBinaryOperation * instruction)3633 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3634   DCHECK(instruction->IsDiv() || instruction->IsRem());
3635   DataType::Type type = instruction->GetResultType();
3636   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3637 
3638   bool is_div = instruction->IsDiv();
3639   LocationSummary* locations = instruction->GetLocations();
3640 
3641   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3642   Location second = locations->InAt(1);
3643 
3644   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3645   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3646 
3647   if (second.IsConstant()) {
3648     int64_t imm = Int64FromConstant(second.GetConstant());
3649 
3650     if (imm == 0) {
3651       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3652     } else if (imm == 1 || imm == -1) {
3653       DivRemOneOrMinusOne(instruction);
3654     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3655       DivByPowerOfTwo(instruction->AsDiv());
3656     } else {
3657       DCHECK(imm <= -2 || imm >= 2);
3658       GenerateDivRemWithAnyConstant(instruction);
3659     }
3660   } else {
3661     SlowPathCode* slow_path =
3662         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3663             instruction, out.AsRegister(), type, is_div);
3664     codegen_->AddSlowPath(slow_path);
3665 
3666     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3667     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3668     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3669     // so it's safe to just use negl instead of more complex comparisons.
3670     if (type == DataType::Type::kInt32) {
3671       __ cmpl(second_reg, Immediate(-1));
3672       __ j(kEqual, slow_path->GetEntryLabel());
3673       // edx:eax <- sign-extended of eax
3674       __ cdq();
3675       // eax = quotient, edx = remainder
3676       __ idivl(second_reg);
3677     } else {
3678       __ cmpq(second_reg, Immediate(-1));
3679       __ j(kEqual, slow_path->GetEntryLabel());
3680       // rdx:rax <- sign-extended of rax
3681       __ cqo();
3682       // rax = quotient, rdx = remainder
3683       __ idivq(second_reg);
3684     }
3685     __ Bind(slow_path->GetExitLabel());
3686   }
3687 }
3688 
VisitDiv(HDiv * div)3689 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3690   LocationSummary* locations =
3691       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3692   switch (div->GetResultType()) {
3693     case DataType::Type::kInt32:
3694     case DataType::Type::kInt64: {
3695       locations->SetInAt(0, Location::RegisterLocation(RAX));
3696       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3697       locations->SetOut(Location::SameAsFirstInput());
3698       // Intel uses edx:eax as the dividend.
3699       locations->AddTemp(Location::RegisterLocation(RDX));
3700       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3701       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3702       // output and request another temp.
3703       if (div->InputAt(1)->IsConstant()) {
3704         locations->AddTemp(Location::RequiresRegister());
3705       }
3706       break;
3707     }
3708 
3709     case DataType::Type::kFloat32:
3710     case DataType::Type::kFloat64: {
3711       locations->SetInAt(0, Location::RequiresFpuRegister());
3712       locations->SetInAt(1, Location::Any());
3713       locations->SetOut(Location::SameAsFirstInput());
3714       break;
3715     }
3716 
3717     default:
3718       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3719   }
3720 }
3721 
VisitDiv(HDiv * div)3722 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3723   LocationSummary* locations = div->GetLocations();
3724   Location first = locations->InAt(0);
3725   Location second = locations->InAt(1);
3726   DCHECK(first.Equals(locations->Out()));
3727 
3728   DataType::Type type = div->GetResultType();
3729   switch (type) {
3730     case DataType::Type::kInt32:
3731     case DataType::Type::kInt64: {
3732       GenerateDivRemIntegral(div);
3733       break;
3734     }
3735 
3736     case DataType::Type::kFloat32: {
3737       if (second.IsFpuRegister()) {
3738         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3739       } else if (second.IsConstant()) {
3740         __ divss(first.AsFpuRegister<XmmRegister>(),
3741                  codegen_->LiteralFloatAddress(
3742                      second.GetConstant()->AsFloatConstant()->GetValue()));
3743       } else {
3744         DCHECK(second.IsStackSlot());
3745         __ divss(first.AsFpuRegister<XmmRegister>(),
3746                  Address(CpuRegister(RSP), second.GetStackIndex()));
3747       }
3748       break;
3749     }
3750 
3751     case DataType::Type::kFloat64: {
3752       if (second.IsFpuRegister()) {
3753         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3754       } else if (second.IsConstant()) {
3755         __ divsd(first.AsFpuRegister<XmmRegister>(),
3756                  codegen_->LiteralDoubleAddress(
3757                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3758       } else {
3759         DCHECK(second.IsDoubleStackSlot());
3760         __ divsd(first.AsFpuRegister<XmmRegister>(),
3761                  Address(CpuRegister(RSP), second.GetStackIndex()));
3762       }
3763       break;
3764     }
3765 
3766     default:
3767       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3768   }
3769 }
3770 
VisitRem(HRem * rem)3771 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3772   DataType::Type type = rem->GetResultType();
3773   LocationSummary* locations =
3774     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
3775 
3776   switch (type) {
3777     case DataType::Type::kInt32:
3778     case DataType::Type::kInt64: {
3779       locations->SetInAt(0, Location::RegisterLocation(RAX));
3780       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3781       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3782       locations->SetOut(Location::RegisterLocation(RDX));
3783       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3784       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3785       // output and request another temp.
3786       if (rem->InputAt(1)->IsConstant()) {
3787         locations->AddTemp(Location::RequiresRegister());
3788       }
3789       break;
3790     }
3791 
3792     case DataType::Type::kFloat32:
3793     case DataType::Type::kFloat64: {
3794       locations->SetInAt(0, Location::Any());
3795       locations->SetInAt(1, Location::Any());
3796       locations->SetOut(Location::RequiresFpuRegister());
3797       locations->AddTemp(Location::RegisterLocation(RAX));
3798       break;
3799     }
3800 
3801     default:
3802       LOG(FATAL) << "Unexpected rem type " << type;
3803   }
3804 }
3805 
VisitRem(HRem * rem)3806 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3807   DataType::Type type = rem->GetResultType();
3808   switch (type) {
3809     case DataType::Type::kInt32:
3810     case DataType::Type::kInt64: {
3811       GenerateDivRemIntegral(rem);
3812       break;
3813     }
3814     case DataType::Type::kFloat32:
3815     case DataType::Type::kFloat64: {
3816       GenerateRemFP(rem);
3817       break;
3818     }
3819     default:
3820       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3821   }
3822 }
3823 
VisitDivZeroCheck(HDivZeroCheck * instruction)3824 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3825   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3826   locations->SetInAt(0, Location::Any());
3827 }
3828 
VisitDivZeroCheck(HDivZeroCheck * instruction)3829 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3830   SlowPathCode* slow_path =
3831       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
3832   codegen_->AddSlowPath(slow_path);
3833 
3834   LocationSummary* locations = instruction->GetLocations();
3835   Location value = locations->InAt(0);
3836 
3837   switch (instruction->GetType()) {
3838     case DataType::Type::kBool:
3839     case DataType::Type::kUint8:
3840     case DataType::Type::kInt8:
3841     case DataType::Type::kUint16:
3842     case DataType::Type::kInt16:
3843     case DataType::Type::kInt32: {
3844       if (value.IsRegister()) {
3845         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3846         __ j(kEqual, slow_path->GetEntryLabel());
3847       } else if (value.IsStackSlot()) {
3848         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3849         __ j(kEqual, slow_path->GetEntryLabel());
3850       } else {
3851         DCHECK(value.IsConstant()) << value;
3852         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3853           __ jmp(slow_path->GetEntryLabel());
3854         }
3855       }
3856       break;
3857     }
3858     case DataType::Type::kInt64: {
3859       if (value.IsRegister()) {
3860         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3861         __ j(kEqual, slow_path->GetEntryLabel());
3862       } else if (value.IsDoubleStackSlot()) {
3863         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3864         __ j(kEqual, slow_path->GetEntryLabel());
3865       } else {
3866         DCHECK(value.IsConstant()) << value;
3867         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3868           __ jmp(slow_path->GetEntryLabel());
3869         }
3870       }
3871       break;
3872     }
3873     default:
3874       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3875   }
3876 }
3877 
HandleShift(HBinaryOperation * op)3878 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3879   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3880 
3881   LocationSummary* locations =
3882       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
3883 
3884   switch (op->GetResultType()) {
3885     case DataType::Type::kInt32:
3886     case DataType::Type::kInt64: {
3887       locations->SetInAt(0, Location::RequiresRegister());
3888       // The shift count needs to be in CL.
3889       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3890       locations->SetOut(Location::SameAsFirstInput());
3891       break;
3892     }
3893     default:
3894       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3895   }
3896 }
3897 
HandleShift(HBinaryOperation * op)3898 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3899   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3900 
3901   LocationSummary* locations = op->GetLocations();
3902   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3903   Location second = locations->InAt(1);
3904 
3905   switch (op->GetResultType()) {
3906     case DataType::Type::kInt32: {
3907       if (second.IsRegister()) {
3908         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3909         if (op->IsShl()) {
3910           __ shll(first_reg, second_reg);
3911         } else if (op->IsShr()) {
3912           __ sarl(first_reg, second_reg);
3913         } else {
3914           __ shrl(first_reg, second_reg);
3915         }
3916       } else {
3917         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3918         if (op->IsShl()) {
3919           __ shll(first_reg, imm);
3920         } else if (op->IsShr()) {
3921           __ sarl(first_reg, imm);
3922         } else {
3923           __ shrl(first_reg, imm);
3924         }
3925       }
3926       break;
3927     }
3928     case DataType::Type::kInt64: {
3929       if (second.IsRegister()) {
3930         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3931         if (op->IsShl()) {
3932           __ shlq(first_reg, second_reg);
3933         } else if (op->IsShr()) {
3934           __ sarq(first_reg, second_reg);
3935         } else {
3936           __ shrq(first_reg, second_reg);
3937         }
3938       } else {
3939         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3940         if (op->IsShl()) {
3941           __ shlq(first_reg, imm);
3942         } else if (op->IsShr()) {
3943           __ sarq(first_reg, imm);
3944         } else {
3945           __ shrq(first_reg, imm);
3946         }
3947       }
3948       break;
3949     }
3950     default:
3951       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3952       UNREACHABLE();
3953   }
3954 }
3955 
VisitRor(HRor * ror)3956 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3957   LocationSummary* locations =
3958       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
3959 
3960   switch (ror->GetResultType()) {
3961     case DataType::Type::kInt32:
3962     case DataType::Type::kInt64: {
3963       locations->SetInAt(0, Location::RequiresRegister());
3964       // The shift count needs to be in CL (unless it is a constant).
3965       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3966       locations->SetOut(Location::SameAsFirstInput());
3967       break;
3968     }
3969     default:
3970       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3971       UNREACHABLE();
3972   }
3973 }
3974 
VisitRor(HRor * ror)3975 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3976   LocationSummary* locations = ror->GetLocations();
3977   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3978   Location second = locations->InAt(1);
3979 
3980   switch (ror->GetResultType()) {
3981     case DataType::Type::kInt32:
3982       if (second.IsRegister()) {
3983         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3984         __ rorl(first_reg, second_reg);
3985       } else {
3986         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3987         __ rorl(first_reg, imm);
3988       }
3989       break;
3990     case DataType::Type::kInt64:
3991       if (second.IsRegister()) {
3992         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3993         __ rorq(first_reg, second_reg);
3994       } else {
3995         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3996         __ rorq(first_reg, imm);
3997       }
3998       break;
3999     default:
4000       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4001       UNREACHABLE();
4002   }
4003 }
4004 
VisitShl(HShl * shl)4005 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4006   HandleShift(shl);
4007 }
4008 
VisitShl(HShl * shl)4009 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4010   HandleShift(shl);
4011 }
4012 
VisitShr(HShr * shr)4013 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4014   HandleShift(shr);
4015 }
4016 
VisitShr(HShr * shr)4017 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4018   HandleShift(shr);
4019 }
4020 
VisitUShr(HUShr * ushr)4021 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4022   HandleShift(ushr);
4023 }
4024 
VisitUShr(HUShr * ushr)4025 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4026   HandleShift(ushr);
4027 }
4028 
VisitNewInstance(HNewInstance * instruction)4029 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4030   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4031       instruction, LocationSummary::kCallOnMainOnly);
4032   InvokeRuntimeCallingConvention calling_convention;
4033   if (instruction->IsStringAlloc()) {
4034     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4035   } else {
4036     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4037   }
4038   locations->SetOut(Location::RegisterLocation(RAX));
4039 }
4040 
VisitNewInstance(HNewInstance * instruction)4041 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4042   // Note: if heap poisoning is enabled, the entry point takes cares
4043   // of poisoning the reference.
4044   if (instruction->IsStringAlloc()) {
4045     // String is allocated through StringFactory. Call NewEmptyString entry point.
4046     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
4047     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
4048     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
4049     __ call(Address(temp, code_offset.SizeValue()));
4050     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4051   } else {
4052     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4053     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4054     DCHECK(!codegen_->IsLeafMethod());
4055   }
4056 }
4057 
VisitNewArray(HNewArray * instruction)4058 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4059   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4060       instruction, LocationSummary::kCallOnMainOnly);
4061   InvokeRuntimeCallingConvention calling_convention;
4062   locations->SetOut(Location::RegisterLocation(RAX));
4063   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4064   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4065 }
4066 
VisitNewArray(HNewArray * instruction)4067 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4068   // Note: if heap poisoning is enabled, the entry point takes cares
4069   // of poisoning the reference.
4070   QuickEntrypointEnum entrypoint =
4071       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4072   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4073   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4074   DCHECK(!codegen_->IsLeafMethod());
4075 }
4076 
VisitParameterValue(HParameterValue * instruction)4077 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4078   LocationSummary* locations =
4079       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4080   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4081   if (location.IsStackSlot()) {
4082     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4083   } else if (location.IsDoubleStackSlot()) {
4084     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4085   }
4086   locations->SetOut(location);
4087 }
4088 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4089 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4090     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4091   // Nothing to do, the parameter is already at its location.
4092 }
4093 
VisitCurrentMethod(HCurrentMethod * instruction)4094 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4095   LocationSummary* locations =
4096       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4097   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4098 }
4099 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4100 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4101     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4102   // Nothing to do, the method is already at its location.
4103 }
4104 
VisitClassTableGet(HClassTableGet * instruction)4105 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4106   LocationSummary* locations =
4107       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4108   locations->SetInAt(0, Location::RequiresRegister());
4109   locations->SetOut(Location::RequiresRegister());
4110 }
4111 
VisitClassTableGet(HClassTableGet * instruction)4112 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4113   LocationSummary* locations = instruction->GetLocations();
4114   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4115     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4116         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4117     __ movq(locations->Out().AsRegister<CpuRegister>(),
4118             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4119   } else {
4120     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4121         instruction->GetIndex(), kX86_64PointerSize));
4122     __ movq(locations->Out().AsRegister<CpuRegister>(),
4123             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4124             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4125     __ movq(locations->Out().AsRegister<CpuRegister>(),
4126             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4127   }
4128 }
4129 
VisitNot(HNot * not_)4130 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4131   LocationSummary* locations =
4132       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4133   locations->SetInAt(0, Location::RequiresRegister());
4134   locations->SetOut(Location::SameAsFirstInput());
4135 }
4136 
VisitNot(HNot * not_)4137 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4138   LocationSummary* locations = not_->GetLocations();
4139   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4140             locations->Out().AsRegister<CpuRegister>().AsRegister());
4141   Location out = locations->Out();
4142   switch (not_->GetResultType()) {
4143     case DataType::Type::kInt32:
4144       __ notl(out.AsRegister<CpuRegister>());
4145       break;
4146 
4147     case DataType::Type::kInt64:
4148       __ notq(out.AsRegister<CpuRegister>());
4149       break;
4150 
4151     default:
4152       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4153   }
4154 }
4155 
VisitBooleanNot(HBooleanNot * bool_not)4156 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4157   LocationSummary* locations =
4158       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4159   locations->SetInAt(0, Location::RequiresRegister());
4160   locations->SetOut(Location::SameAsFirstInput());
4161 }
4162 
VisitBooleanNot(HBooleanNot * bool_not)4163 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4164   LocationSummary* locations = bool_not->GetLocations();
4165   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4166             locations->Out().AsRegister<CpuRegister>().AsRegister());
4167   Location out = locations->Out();
4168   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4169 }
4170 
VisitPhi(HPhi * instruction)4171 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4172   LocationSummary* locations =
4173       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4174   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4175     locations->SetInAt(i, Location::Any());
4176   }
4177   locations->SetOut(Location::Any());
4178 }
4179 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4180 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4181   LOG(FATAL) << "Unimplemented";
4182 }
4183 
GenerateMemoryBarrier(MemBarrierKind kind)4184 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4185   /*
4186    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4187    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4188    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4189    */
4190   switch (kind) {
4191     case MemBarrierKind::kAnyAny: {
4192       MemoryFence();
4193       break;
4194     }
4195     case MemBarrierKind::kAnyStore:
4196     case MemBarrierKind::kLoadAny:
4197     case MemBarrierKind::kStoreStore: {
4198       // nop
4199       break;
4200     }
4201     case MemBarrierKind::kNTStoreStore:
4202       // Non-Temporal Store/Store needs an explicit fence.
4203       MemoryFence(/* non-temporal */ true);
4204       break;
4205   }
4206 }
4207 
HandleFieldGet(HInstruction * instruction)4208 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4209   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4210 
4211   bool object_field_get_with_read_barrier =
4212       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4213   LocationSummary* locations =
4214       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4215                                                        object_field_get_with_read_barrier
4216                                                            ? LocationSummary::kCallOnSlowPath
4217                                                            : LocationSummary::kNoCall);
4218   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4219     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4220   }
4221   locations->SetInAt(0, Location::RequiresRegister());
4222   if (DataType::IsFloatingPointType(instruction->GetType())) {
4223     locations->SetOut(Location::RequiresFpuRegister());
4224   } else {
4225     // The output overlaps for an object field get when read barriers
4226     // are enabled: we do not want the move to overwrite the object's
4227     // location, as we need it to emit the read barrier.
4228     locations->SetOut(
4229         Location::RequiresRegister(),
4230         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4231   }
4232 }
4233 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4234 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4235                                                     const FieldInfo& field_info) {
4236   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4237 
4238   LocationSummary* locations = instruction->GetLocations();
4239   Location base_loc = locations->InAt(0);
4240   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4241   Location out = locations->Out();
4242   bool is_volatile = field_info.IsVolatile();
4243   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4244   DataType::Type load_type = instruction->GetType();
4245   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4246 
4247   switch (load_type) {
4248     case DataType::Type::kBool:
4249     case DataType::Type::kUint8: {
4250       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4251       break;
4252     }
4253 
4254     case DataType::Type::kInt8: {
4255       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4256       break;
4257     }
4258 
4259     case DataType::Type::kUint16: {
4260       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4261       break;
4262     }
4263 
4264     case DataType::Type::kInt16: {
4265       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4266       break;
4267     }
4268 
4269     case DataType::Type::kInt32: {
4270       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4271       break;
4272     }
4273 
4274     case DataType::Type::kReference: {
4275       // /* HeapReference<Object> */ out = *(base + offset)
4276       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4277         // Note that a potential implicit null check is handled in this
4278         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4279         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4280             instruction, out, base, offset, /* needs_null_check */ true);
4281         if (is_volatile) {
4282           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4283         }
4284       } else {
4285         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4286         codegen_->MaybeRecordImplicitNullCheck(instruction);
4287         if (is_volatile) {
4288           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4289         }
4290         // If read barriers are enabled, emit read barriers other than
4291         // Baker's using a slow path (and also unpoison the loaded
4292         // reference, if heap poisoning is enabled).
4293         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4294       }
4295       break;
4296     }
4297 
4298     case DataType::Type::kInt64: {
4299       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4300       break;
4301     }
4302 
4303     case DataType::Type::kFloat32: {
4304       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4305       break;
4306     }
4307 
4308     case DataType::Type::kFloat64: {
4309       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4310       break;
4311     }
4312 
4313     case DataType::Type::kUint32:
4314     case DataType::Type::kUint64:
4315     case DataType::Type::kVoid:
4316       LOG(FATAL) << "Unreachable type " << load_type;
4317       UNREACHABLE();
4318   }
4319 
4320   if (load_type == DataType::Type::kReference) {
4321     // Potential implicit null checks, in the case of reference
4322     // fields, are handled in the previous switch statement.
4323   } else {
4324     codegen_->MaybeRecordImplicitNullCheck(instruction);
4325   }
4326 
4327   if (is_volatile) {
4328     if (load_type == DataType::Type::kReference) {
4329       // Memory barriers, in the case of references, are also handled
4330       // in the previous switch statement.
4331     } else {
4332       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4333     }
4334   }
4335 }
4336 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4337 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4338                                             const FieldInfo& field_info) {
4339   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4340 
4341   LocationSummary* locations =
4342       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4343   DataType::Type field_type = field_info.GetFieldType();
4344   bool is_volatile = field_info.IsVolatile();
4345   bool needs_write_barrier =
4346       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4347 
4348   locations->SetInAt(0, Location::RequiresRegister());
4349   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4350     if (is_volatile) {
4351       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4352       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4353     } else {
4354       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4355     }
4356   } else {
4357     if (is_volatile) {
4358       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4359       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4360     } else {
4361       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4362     }
4363   }
4364   if (needs_write_barrier) {
4365     // Temporary registers for the write barrier.
4366     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4367     locations->AddTemp(Location::RequiresRegister());
4368   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4369     // Temporary register for the reference poisoning.
4370     locations->AddTemp(Location::RequiresRegister());
4371   }
4372 }
4373 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4374 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4375                                                     const FieldInfo& field_info,
4376                                                     bool value_can_be_null) {
4377   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4378 
4379   LocationSummary* locations = instruction->GetLocations();
4380   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4381   Location value = locations->InAt(1);
4382   bool is_volatile = field_info.IsVolatile();
4383   DataType::Type field_type = field_info.GetFieldType();
4384   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4385 
4386   if (is_volatile) {
4387     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4388   }
4389 
4390   bool maybe_record_implicit_null_check_done = false;
4391 
4392   switch (field_type) {
4393     case DataType::Type::kBool:
4394     case DataType::Type::kUint8:
4395     case DataType::Type::kInt8: {
4396       if (value.IsConstant()) {
4397         __ movb(Address(base, offset),
4398                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4399       } else {
4400         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4401       }
4402       break;
4403     }
4404 
4405     case DataType::Type::kUint16:
4406     case DataType::Type::kInt16: {
4407       if (value.IsConstant()) {
4408         __ movw(Address(base, offset),
4409                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4410       } else {
4411         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4412       }
4413       break;
4414     }
4415 
4416     case DataType::Type::kInt32:
4417     case DataType::Type::kReference: {
4418       if (value.IsConstant()) {
4419         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4420         // `field_type == DataType::Type::kReference` implies `v == 0`.
4421         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4422         // Note: if heap poisoning is enabled, no need to poison
4423         // (negate) `v` if it is a reference, as it would be null.
4424         __ movl(Address(base, offset), Immediate(v));
4425       } else {
4426         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4427           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4428           __ movl(temp, value.AsRegister<CpuRegister>());
4429           __ PoisonHeapReference(temp);
4430           __ movl(Address(base, offset), temp);
4431         } else {
4432           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4433         }
4434       }
4435       break;
4436     }
4437 
4438     case DataType::Type::kInt64: {
4439       if (value.IsConstant()) {
4440         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4441         codegen_->MoveInt64ToAddress(Address(base, offset),
4442                                      Address(base, offset + sizeof(int32_t)),
4443                                      v,
4444                                      instruction);
4445         maybe_record_implicit_null_check_done = true;
4446       } else {
4447         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4448       }
4449       break;
4450     }
4451 
4452     case DataType::Type::kFloat32: {
4453       if (value.IsConstant()) {
4454         int32_t v =
4455             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4456         __ movl(Address(base, offset), Immediate(v));
4457       } else {
4458         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4459       }
4460       break;
4461     }
4462 
4463     case DataType::Type::kFloat64: {
4464       if (value.IsConstant()) {
4465         int64_t v =
4466             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4467         codegen_->MoveInt64ToAddress(Address(base, offset),
4468                                      Address(base, offset + sizeof(int32_t)),
4469                                      v,
4470                                      instruction);
4471         maybe_record_implicit_null_check_done = true;
4472       } else {
4473         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4474       }
4475       break;
4476     }
4477 
4478     case DataType::Type::kUint32:
4479     case DataType::Type::kUint64:
4480     case DataType::Type::kVoid:
4481       LOG(FATAL) << "Unreachable type " << field_type;
4482       UNREACHABLE();
4483   }
4484 
4485   if (!maybe_record_implicit_null_check_done) {
4486     codegen_->MaybeRecordImplicitNullCheck(instruction);
4487   }
4488 
4489   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4490     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4491     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4492     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4493   }
4494 
4495   if (is_volatile) {
4496     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4497   }
4498 }
4499 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4500 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4501   HandleFieldSet(instruction, instruction->GetFieldInfo());
4502 }
4503 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4504 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4505   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4506 }
4507 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4508 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4509   HandleFieldGet(instruction);
4510 }
4511 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4512 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4513   HandleFieldGet(instruction, instruction->GetFieldInfo());
4514 }
4515 
VisitStaticFieldGet(HStaticFieldGet * instruction)4516 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4517   HandleFieldGet(instruction);
4518 }
4519 
VisitStaticFieldGet(HStaticFieldGet * instruction)4520 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4521   HandleFieldGet(instruction, instruction->GetFieldInfo());
4522 }
4523 
VisitStaticFieldSet(HStaticFieldSet * instruction)4524 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4525   HandleFieldSet(instruction, instruction->GetFieldInfo());
4526 }
4527 
VisitStaticFieldSet(HStaticFieldSet * instruction)4528 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4529   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4530 }
4531 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4532 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4533     HUnresolvedInstanceFieldGet* instruction) {
4534   FieldAccessCallingConventionX86_64 calling_convention;
4535   codegen_->CreateUnresolvedFieldLocationSummary(
4536       instruction, instruction->GetFieldType(), calling_convention);
4537 }
4538 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4539 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4540     HUnresolvedInstanceFieldGet* instruction) {
4541   FieldAccessCallingConventionX86_64 calling_convention;
4542   codegen_->GenerateUnresolvedFieldAccess(instruction,
4543                                           instruction->GetFieldType(),
4544                                           instruction->GetFieldIndex(),
4545                                           instruction->GetDexPc(),
4546                                           calling_convention);
4547 }
4548 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4549 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4550     HUnresolvedInstanceFieldSet* instruction) {
4551   FieldAccessCallingConventionX86_64 calling_convention;
4552   codegen_->CreateUnresolvedFieldLocationSummary(
4553       instruction, instruction->GetFieldType(), calling_convention);
4554 }
4555 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4556 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4557     HUnresolvedInstanceFieldSet* instruction) {
4558   FieldAccessCallingConventionX86_64 calling_convention;
4559   codegen_->GenerateUnresolvedFieldAccess(instruction,
4560                                           instruction->GetFieldType(),
4561                                           instruction->GetFieldIndex(),
4562                                           instruction->GetDexPc(),
4563                                           calling_convention);
4564 }
4565 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4566 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4567     HUnresolvedStaticFieldGet* instruction) {
4568   FieldAccessCallingConventionX86_64 calling_convention;
4569   codegen_->CreateUnresolvedFieldLocationSummary(
4570       instruction, instruction->GetFieldType(), calling_convention);
4571 }
4572 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4573 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4574     HUnresolvedStaticFieldGet* instruction) {
4575   FieldAccessCallingConventionX86_64 calling_convention;
4576   codegen_->GenerateUnresolvedFieldAccess(instruction,
4577                                           instruction->GetFieldType(),
4578                                           instruction->GetFieldIndex(),
4579                                           instruction->GetDexPc(),
4580                                           calling_convention);
4581 }
4582 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4583 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4584     HUnresolvedStaticFieldSet* instruction) {
4585   FieldAccessCallingConventionX86_64 calling_convention;
4586   codegen_->CreateUnresolvedFieldLocationSummary(
4587       instruction, instruction->GetFieldType(), calling_convention);
4588 }
4589 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4590 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4591     HUnresolvedStaticFieldSet* instruction) {
4592   FieldAccessCallingConventionX86_64 calling_convention;
4593   codegen_->GenerateUnresolvedFieldAccess(instruction,
4594                                           instruction->GetFieldType(),
4595                                           instruction->GetFieldIndex(),
4596                                           instruction->GetDexPc(),
4597                                           calling_convention);
4598 }
4599 
VisitNullCheck(HNullCheck * instruction)4600 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4601   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4602   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4603       ? Location::RequiresRegister()
4604       : Location::Any();
4605   locations->SetInAt(0, loc);
4606 }
4607 
GenerateImplicitNullCheck(HNullCheck * instruction)4608 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4609   if (CanMoveNullCheckToUser(instruction)) {
4610     return;
4611   }
4612   LocationSummary* locations = instruction->GetLocations();
4613   Location obj = locations->InAt(0);
4614 
4615   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4616   RecordPcInfo(instruction, instruction->GetDexPc());
4617 }
4618 
GenerateExplicitNullCheck(HNullCheck * instruction)4619 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4620   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
4621   AddSlowPath(slow_path);
4622 
4623   LocationSummary* locations = instruction->GetLocations();
4624   Location obj = locations->InAt(0);
4625 
4626   if (obj.IsRegister()) {
4627     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4628   } else if (obj.IsStackSlot()) {
4629     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4630   } else {
4631     DCHECK(obj.IsConstant()) << obj;
4632     DCHECK(obj.GetConstant()->IsNullConstant());
4633     __ jmp(slow_path->GetEntryLabel());
4634     return;
4635   }
4636   __ j(kEqual, slow_path->GetEntryLabel());
4637 }
4638 
VisitNullCheck(HNullCheck * instruction)4639 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4640   codegen_->GenerateNullCheck(instruction);
4641 }
4642 
VisitArrayGet(HArrayGet * instruction)4643 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4644   bool object_array_get_with_read_barrier =
4645       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4646   LocationSummary* locations =
4647       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4648                                                        object_array_get_with_read_barrier
4649                                                            ? LocationSummary::kCallOnSlowPath
4650                                                            : LocationSummary::kNoCall);
4651   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4652     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4653   }
4654   locations->SetInAt(0, Location::RequiresRegister());
4655   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4656   if (DataType::IsFloatingPointType(instruction->GetType())) {
4657     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4658   } else {
4659     // The output overlaps for an object array get when read barriers
4660     // are enabled: we do not want the move to overwrite the array's
4661     // location, as we need it to emit the read barrier.
4662     locations->SetOut(
4663         Location::RequiresRegister(),
4664         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4665   }
4666 }
4667 
VisitArrayGet(HArrayGet * instruction)4668 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4669   LocationSummary* locations = instruction->GetLocations();
4670   Location obj_loc = locations->InAt(0);
4671   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4672   Location index = locations->InAt(1);
4673   Location out_loc = locations->Out();
4674   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
4675 
4676   DataType::Type type = instruction->GetType();
4677   switch (type) {
4678     case DataType::Type::kBool:
4679     case DataType::Type::kUint8: {
4680       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4681       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4682       break;
4683     }
4684 
4685     case DataType::Type::kInt8: {
4686       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4687       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4688       break;
4689     }
4690 
4691     case DataType::Type::kUint16: {
4692       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4693       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
4694         // Branch cases into compressed and uncompressed for each index's type.
4695         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
4696         NearLabel done, not_compressed;
4697         __ testb(Address(obj, count_offset), Immediate(1));
4698         codegen_->MaybeRecordImplicitNullCheck(instruction);
4699         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
4700                       "Expecting 0=compressed, 1=uncompressed");
4701         __ j(kNotZero, &not_compressed);
4702         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4703         __ jmp(&done);
4704         __ Bind(&not_compressed);
4705         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4706         __ Bind(&done);
4707       } else {
4708         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4709       }
4710       break;
4711     }
4712 
4713     case DataType::Type::kInt16: {
4714       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4715       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4716       break;
4717     }
4718 
4719     case DataType::Type::kInt32: {
4720       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4721       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4722       break;
4723     }
4724 
4725     case DataType::Type::kReference: {
4726       static_assert(
4727           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4728           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4729       // /* HeapReference<Object> */ out =
4730       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4731       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4732         // Note that a potential implicit null check is handled in this
4733         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
4734         codegen_->GenerateArrayLoadWithBakerReadBarrier(
4735             instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
4736       } else {
4737         CpuRegister out = out_loc.AsRegister<CpuRegister>();
4738         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4739         codegen_->MaybeRecordImplicitNullCheck(instruction);
4740         // If read barriers are enabled, emit read barriers other than
4741         // Baker's using a slow path (and also unpoison the loaded
4742         // reference, if heap poisoning is enabled).
4743         if (index.IsConstant()) {
4744           uint32_t offset =
4745               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4746           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4747         } else {
4748           codegen_->MaybeGenerateReadBarrierSlow(
4749               instruction, out_loc, out_loc, obj_loc, data_offset, index);
4750         }
4751       }
4752       break;
4753     }
4754 
4755     case DataType::Type::kInt64: {
4756       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4757       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4758       break;
4759     }
4760 
4761     case DataType::Type::kFloat32: {
4762       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4763       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4764       break;
4765     }
4766 
4767     case DataType::Type::kFloat64: {
4768       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4769       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4770       break;
4771     }
4772 
4773     case DataType::Type::kUint32:
4774     case DataType::Type::kUint64:
4775     case DataType::Type::kVoid:
4776       LOG(FATAL) << "Unreachable type " << type;
4777       UNREACHABLE();
4778   }
4779 
4780   if (type == DataType::Type::kReference) {
4781     // Potential implicit null checks, in the case of reference
4782     // arrays, are handled in the previous switch statement.
4783   } else {
4784     codegen_->MaybeRecordImplicitNullCheck(instruction);
4785   }
4786 }
4787 
VisitArraySet(HArraySet * instruction)4788 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4789   DataType::Type value_type = instruction->GetComponentType();
4790 
4791   bool needs_write_barrier =
4792       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4793   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4794 
4795   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4796       instruction,
4797       may_need_runtime_call_for_type_check ?
4798           LocationSummary::kCallOnSlowPath :
4799           LocationSummary::kNoCall);
4800 
4801   locations->SetInAt(0, Location::RequiresRegister());
4802   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4803   if (DataType::IsFloatingPointType(value_type)) {
4804     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4805   } else {
4806     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4807   }
4808 
4809   if (needs_write_barrier) {
4810     // Temporary registers for the write barrier.
4811     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
4812     locations->AddTemp(Location::RequiresRegister());
4813   }
4814 }
4815 
VisitArraySet(HArraySet * instruction)4816 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4817   LocationSummary* locations = instruction->GetLocations();
4818   Location array_loc = locations->InAt(0);
4819   CpuRegister array = array_loc.AsRegister<CpuRegister>();
4820   Location index = locations->InAt(1);
4821   Location value = locations->InAt(2);
4822   DataType::Type value_type = instruction->GetComponentType();
4823   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4824   bool needs_write_barrier =
4825       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4826   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4827   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4828   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4829 
4830   switch (value_type) {
4831     case DataType::Type::kBool:
4832     case DataType::Type::kUint8:
4833     case DataType::Type::kInt8: {
4834       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4835       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
4836       if (value.IsRegister()) {
4837         __ movb(address, value.AsRegister<CpuRegister>());
4838       } else {
4839         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4840       }
4841       codegen_->MaybeRecordImplicitNullCheck(instruction);
4842       break;
4843     }
4844 
4845     case DataType::Type::kUint16:
4846     case DataType::Type::kInt16: {
4847       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4848       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
4849       if (value.IsRegister()) {
4850         __ movw(address, value.AsRegister<CpuRegister>());
4851       } else {
4852         DCHECK(value.IsConstant()) << value;
4853         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4854       }
4855       codegen_->MaybeRecordImplicitNullCheck(instruction);
4856       break;
4857     }
4858 
4859     case DataType::Type::kReference: {
4860       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4861       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4862 
4863       if (!value.IsRegister()) {
4864         // Just setting null.
4865         DCHECK(instruction->InputAt(2)->IsNullConstant());
4866         DCHECK(value.IsConstant()) << value;
4867         __ movl(address, Immediate(0));
4868         codegen_->MaybeRecordImplicitNullCheck(instruction);
4869         DCHECK(!needs_write_barrier);
4870         DCHECK(!may_need_runtime_call_for_type_check);
4871         break;
4872       }
4873 
4874       DCHECK(needs_write_barrier);
4875       CpuRegister register_value = value.AsRegister<CpuRegister>();
4876       // We cannot use a NearLabel for `done`, as its range may be too
4877       // short when Baker read barriers are enabled.
4878       Label done;
4879       NearLabel not_null, do_put;
4880       SlowPathCode* slow_path = nullptr;
4881       Location temp_loc = locations->GetTemp(0);
4882       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4883       if (may_need_runtime_call_for_type_check) {
4884         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
4885         codegen_->AddSlowPath(slow_path);
4886         if (instruction->GetValueCanBeNull()) {
4887           __ testl(register_value, register_value);
4888           __ j(kNotEqual, &not_null);
4889           __ movl(address, Immediate(0));
4890           codegen_->MaybeRecordImplicitNullCheck(instruction);
4891           __ jmp(&done);
4892           __ Bind(&not_null);
4893         }
4894 
4895         // Note that when Baker read barriers are enabled, the type
4896         // checks are performed without read barriers.  This is fine,
4897         // even in the case where a class object is in the from-space
4898         // after the flip, as a comparison involving such a type would
4899         // not produce a false positive; it may of course produce a
4900         // false negative, in which case we would take the ArraySet
4901         // slow path.
4902 
4903         // /* HeapReference<Class> */ temp = array->klass_
4904         __ movl(temp, Address(array, class_offset));
4905         codegen_->MaybeRecordImplicitNullCheck(instruction);
4906         __ MaybeUnpoisonHeapReference(temp);
4907 
4908         // /* HeapReference<Class> */ temp = temp->component_type_
4909         __ movl(temp, Address(temp, component_offset));
4910         // If heap poisoning is enabled, no need to unpoison `temp`
4911         // nor the object reference in `register_value->klass`, as
4912         // we are comparing two poisoned references.
4913         __ cmpl(temp, Address(register_value, class_offset));
4914 
4915         if (instruction->StaticTypeOfArrayIsObjectArray()) {
4916           __ j(kEqual, &do_put);
4917           // If heap poisoning is enabled, the `temp` reference has
4918           // not been unpoisoned yet; unpoison it now.
4919           __ MaybeUnpoisonHeapReference(temp);
4920 
4921           // If heap poisoning is enabled, no need to unpoison the
4922           // heap reference loaded below, as it is only used for a
4923           // comparison with null.
4924           __ cmpl(Address(temp, super_offset), Immediate(0));
4925           __ j(kNotEqual, slow_path->GetEntryLabel());
4926           __ Bind(&do_put);
4927         } else {
4928           __ j(kNotEqual, slow_path->GetEntryLabel());
4929         }
4930       }
4931 
4932       if (kPoisonHeapReferences) {
4933         __ movl(temp, register_value);
4934         __ PoisonHeapReference(temp);
4935         __ movl(address, temp);
4936       } else {
4937         __ movl(address, register_value);
4938       }
4939       if (!may_need_runtime_call_for_type_check) {
4940         codegen_->MaybeRecordImplicitNullCheck(instruction);
4941       }
4942 
4943       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4944       codegen_->MarkGCCard(
4945           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4946       __ Bind(&done);
4947 
4948       if (slow_path != nullptr) {
4949         __ Bind(slow_path->GetExitLabel());
4950       }
4951 
4952       break;
4953     }
4954 
4955     case DataType::Type::kInt32: {
4956       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4957       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4958       if (value.IsRegister()) {
4959         __ movl(address, value.AsRegister<CpuRegister>());
4960       } else {
4961         DCHECK(value.IsConstant()) << value;
4962         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4963         __ movl(address, Immediate(v));
4964       }
4965       codegen_->MaybeRecordImplicitNullCheck(instruction);
4966       break;
4967     }
4968 
4969     case DataType::Type::kInt64: {
4970       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4971       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
4972       if (value.IsRegister()) {
4973         __ movq(address, value.AsRegister<CpuRegister>());
4974         codegen_->MaybeRecordImplicitNullCheck(instruction);
4975       } else {
4976         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4977         Address address_high =
4978             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
4979         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4980       }
4981       break;
4982     }
4983 
4984     case DataType::Type::kFloat32: {
4985       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4986       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4987       if (value.IsFpuRegister()) {
4988         __ movss(address, value.AsFpuRegister<XmmRegister>());
4989       } else {
4990         DCHECK(value.IsConstant());
4991         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4992         __ movl(address, Immediate(v));
4993       }
4994       codegen_->MaybeRecordImplicitNullCheck(instruction);
4995       break;
4996     }
4997 
4998     case DataType::Type::kFloat64: {
4999       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5000       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5001       if (value.IsFpuRegister()) {
5002         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5003         codegen_->MaybeRecordImplicitNullCheck(instruction);
5004       } else {
5005         int64_t v =
5006             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5007         Address address_high =
5008             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5009         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5010       }
5011       break;
5012     }
5013 
5014     case DataType::Type::kUint32:
5015     case DataType::Type::kUint64:
5016     case DataType::Type::kVoid:
5017       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5018       UNREACHABLE();
5019   }
5020 }
5021 
VisitArrayLength(HArrayLength * instruction)5022 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5023   LocationSummary* locations =
5024       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5025   locations->SetInAt(0, Location::RequiresRegister());
5026   if (!instruction->IsEmittedAtUseSite()) {
5027     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5028   }
5029 }
5030 
VisitArrayLength(HArrayLength * instruction)5031 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5032   if (instruction->IsEmittedAtUseSite()) {
5033     return;
5034   }
5035 
5036   LocationSummary* locations = instruction->GetLocations();
5037   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5038   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5039   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5040   __ movl(out, Address(obj, offset));
5041   codegen_->MaybeRecordImplicitNullCheck(instruction);
5042   // Mask out most significant bit in case the array is String's array of char.
5043   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5044     __ shrl(out, Immediate(1));
5045   }
5046 }
5047 
VisitBoundsCheck(HBoundsCheck * instruction)5048 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5049   RegisterSet caller_saves = RegisterSet::Empty();
5050   InvokeRuntimeCallingConvention calling_convention;
5051   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5052   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5053   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5054   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5055   HInstruction* length = instruction->InputAt(1);
5056   if (!length->IsEmittedAtUseSite()) {
5057     locations->SetInAt(1, Location::RegisterOrConstant(length));
5058   }
5059 }
5060 
VisitBoundsCheck(HBoundsCheck * instruction)5061 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5062   LocationSummary* locations = instruction->GetLocations();
5063   Location index_loc = locations->InAt(0);
5064   Location length_loc = locations->InAt(1);
5065   SlowPathCode* slow_path =
5066       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5067 
5068   if (length_loc.IsConstant()) {
5069     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5070     if (index_loc.IsConstant()) {
5071       // BCE will remove the bounds check if we are guarenteed to pass.
5072       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5073       if (index < 0 || index >= length) {
5074         codegen_->AddSlowPath(slow_path);
5075         __ jmp(slow_path->GetEntryLabel());
5076       } else {
5077         // Some optimization after BCE may have generated this, and we should not
5078         // generate a bounds check if it is a valid range.
5079       }
5080       return;
5081     }
5082 
5083     // We have to reverse the jump condition because the length is the constant.
5084     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5085     __ cmpl(index_reg, Immediate(length));
5086     codegen_->AddSlowPath(slow_path);
5087     __ j(kAboveEqual, slow_path->GetEntryLabel());
5088   } else {
5089     HInstruction* array_length = instruction->InputAt(1);
5090     if (array_length->IsEmittedAtUseSite()) {
5091       // Address the length field in the array.
5092       DCHECK(array_length->IsArrayLength());
5093       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5094       Location array_loc = array_length->GetLocations()->InAt(0);
5095       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5096       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5097         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5098         // the string compression flag) with the in-memory length and avoid the temporary.
5099         CpuRegister length_reg = CpuRegister(TMP);
5100         __ movl(length_reg, array_len);
5101         codegen_->MaybeRecordImplicitNullCheck(array_length);
5102         __ shrl(length_reg, Immediate(1));
5103         codegen_->GenerateIntCompare(length_reg, index_loc);
5104       } else {
5105         // Checking the bound for general case:
5106         // Array of char or String's array when the compression feature off.
5107         if (index_loc.IsConstant()) {
5108           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5109           __ cmpl(array_len, Immediate(value));
5110         } else {
5111           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5112         }
5113         codegen_->MaybeRecordImplicitNullCheck(array_length);
5114       }
5115     } else {
5116       codegen_->GenerateIntCompare(length_loc, index_loc);
5117     }
5118     codegen_->AddSlowPath(slow_path);
5119     __ j(kBelowEqual, slow_path->GetEntryLabel());
5120   }
5121 }
5122 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5123 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5124                                      CpuRegister card,
5125                                      CpuRegister object,
5126                                      CpuRegister value,
5127                                      bool value_can_be_null) {
5128   NearLabel is_null;
5129   if (value_can_be_null) {
5130     __ testl(value, value);
5131     __ j(kEqual, &is_null);
5132   }
5133   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5134                                         /* no_rip */ true));
5135   __ movq(temp, object);
5136   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5137   __ movb(Address(temp, card, TIMES_1, 0), card);
5138   if (value_can_be_null) {
5139     __ Bind(&is_null);
5140   }
5141 }
5142 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5143 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5144   LOG(FATAL) << "Unimplemented";
5145 }
5146 
VisitParallelMove(HParallelMove * instruction)5147 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5148   if (instruction->GetNext()->IsSuspendCheck() &&
5149       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5150     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5151     // The back edge will generate the suspend check.
5152     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5153   }
5154 
5155   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5156 }
5157 
VisitSuspendCheck(HSuspendCheck * instruction)5158 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5159   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5160       instruction, LocationSummary::kCallOnSlowPath);
5161   // In suspend check slow path, usually there are no caller-save registers at all.
5162   // If SIMD instructions are present, however, we force spilling all live SIMD
5163   // registers in full width (since the runtime only saves/restores lower part).
5164   locations->SetCustomSlowPathCallerSaves(
5165       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5166 }
5167 
VisitSuspendCheck(HSuspendCheck * instruction)5168 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5169   HBasicBlock* block = instruction->GetBlock();
5170   if (block->GetLoopInformation() != nullptr) {
5171     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5172     // The back edge will generate the suspend check.
5173     return;
5174   }
5175   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5176     // The goto will generate the suspend check.
5177     return;
5178   }
5179   GenerateSuspendCheck(instruction, nullptr);
5180 }
5181 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5182 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5183                                                           HBasicBlock* successor) {
5184   SuspendCheckSlowPathX86_64* slow_path =
5185       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5186   if (slow_path == nullptr) {
5187     slow_path =
5188         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5189     instruction->SetSlowPath(slow_path);
5190     codegen_->AddSlowPath(slow_path);
5191     if (successor != nullptr) {
5192       DCHECK(successor->IsLoopHeader());
5193     }
5194   } else {
5195     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5196   }
5197 
5198   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5199                                   /* no_rip */ true),
5200                 Immediate(0));
5201   if (successor == nullptr) {
5202     __ j(kNotEqual, slow_path->GetEntryLabel());
5203     __ Bind(slow_path->GetReturnLabel());
5204   } else {
5205     __ j(kEqual, codegen_->GetLabelOf(successor));
5206     __ jmp(slow_path->GetEntryLabel());
5207   }
5208 }
5209 
GetAssembler() const5210 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5211   return codegen_->GetAssembler();
5212 }
5213 
EmitMove(size_t index)5214 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5215   MoveOperands* move = moves_[index];
5216   Location source = move->GetSource();
5217   Location destination = move->GetDestination();
5218 
5219   if (source.IsRegister()) {
5220     if (destination.IsRegister()) {
5221       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5222     } else if (destination.IsStackSlot()) {
5223       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5224               source.AsRegister<CpuRegister>());
5225     } else {
5226       DCHECK(destination.IsDoubleStackSlot());
5227       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5228               source.AsRegister<CpuRegister>());
5229     }
5230   } else if (source.IsStackSlot()) {
5231     if (destination.IsRegister()) {
5232       __ movl(destination.AsRegister<CpuRegister>(),
5233               Address(CpuRegister(RSP), source.GetStackIndex()));
5234     } else if (destination.IsFpuRegister()) {
5235       __ movss(destination.AsFpuRegister<XmmRegister>(),
5236               Address(CpuRegister(RSP), source.GetStackIndex()));
5237     } else {
5238       DCHECK(destination.IsStackSlot());
5239       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5240       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5241     }
5242   } else if (source.IsDoubleStackSlot()) {
5243     if (destination.IsRegister()) {
5244       __ movq(destination.AsRegister<CpuRegister>(),
5245               Address(CpuRegister(RSP), source.GetStackIndex()));
5246     } else if (destination.IsFpuRegister()) {
5247       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5248                Address(CpuRegister(RSP), source.GetStackIndex()));
5249     } else {
5250       DCHECK(destination.IsDoubleStackSlot()) << destination;
5251       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5252       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5253     }
5254   } else if (source.IsSIMDStackSlot()) {
5255     if (destination.IsFpuRegister()) {
5256       __ movups(destination.AsFpuRegister<XmmRegister>(),
5257                 Address(CpuRegister(RSP), source.GetStackIndex()));
5258     } else {
5259       DCHECK(destination.IsSIMDStackSlot());
5260       size_t high = kX86_64WordSize;
5261       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5262       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5263       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5264       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5265     }
5266   } else if (source.IsConstant()) {
5267     HConstant* constant = source.GetConstant();
5268     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5269       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5270       if (destination.IsRegister()) {
5271         if (value == 0) {
5272           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5273         } else {
5274           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5275         }
5276       } else {
5277         DCHECK(destination.IsStackSlot()) << destination;
5278         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5279       }
5280     } else if (constant->IsLongConstant()) {
5281       int64_t value = constant->AsLongConstant()->GetValue();
5282       if (destination.IsRegister()) {
5283         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5284       } else {
5285         DCHECK(destination.IsDoubleStackSlot()) << destination;
5286         codegen_->Store64BitValueToStack(destination, value);
5287       }
5288     } else if (constant->IsFloatConstant()) {
5289       float fp_value = constant->AsFloatConstant()->GetValue();
5290       if (destination.IsFpuRegister()) {
5291         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5292         codegen_->Load32BitValue(dest, fp_value);
5293       } else {
5294         DCHECK(destination.IsStackSlot()) << destination;
5295         Immediate imm(bit_cast<int32_t, float>(fp_value));
5296         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5297       }
5298     } else {
5299       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5300       double fp_value =  constant->AsDoubleConstant()->GetValue();
5301       int64_t value = bit_cast<int64_t, double>(fp_value);
5302       if (destination.IsFpuRegister()) {
5303         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5304         codegen_->Load64BitValue(dest, fp_value);
5305       } else {
5306         DCHECK(destination.IsDoubleStackSlot()) << destination;
5307         codegen_->Store64BitValueToStack(destination, value);
5308       }
5309     }
5310   } else if (source.IsFpuRegister()) {
5311     if (destination.IsFpuRegister()) {
5312       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5313     } else if (destination.IsStackSlot()) {
5314       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5315                source.AsFpuRegister<XmmRegister>());
5316     } else if (destination.IsDoubleStackSlot()) {
5317       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5318                source.AsFpuRegister<XmmRegister>());
5319     } else {
5320        DCHECK(destination.IsSIMDStackSlot());
5321       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5322                 source.AsFpuRegister<XmmRegister>());
5323     }
5324   }
5325 }
5326 
Exchange32(CpuRegister reg,int mem)5327 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5328   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5329   __ movl(Address(CpuRegister(RSP), mem), reg);
5330   __ movl(reg, CpuRegister(TMP));
5331 }
5332 
Exchange64(CpuRegister reg1,CpuRegister reg2)5333 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5334   __ movq(CpuRegister(TMP), reg1);
5335   __ movq(reg1, reg2);
5336   __ movq(reg2, CpuRegister(TMP));
5337 }
5338 
Exchange64(CpuRegister reg,int mem)5339 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5340   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5341   __ movq(Address(CpuRegister(RSP), mem), reg);
5342   __ movq(reg, CpuRegister(TMP));
5343 }
5344 
Exchange32(XmmRegister reg,int mem)5345 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5346   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5347   __ movss(Address(CpuRegister(RSP), mem), reg);
5348   __ movd(reg, CpuRegister(TMP));
5349 }
5350 
Exchange64(XmmRegister reg,int mem)5351 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5352   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5353   __ movsd(Address(CpuRegister(RSP), mem), reg);
5354   __ movd(reg, CpuRegister(TMP));
5355 }
5356 
Exchange128(XmmRegister reg,int mem)5357 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5358   size_t extra_slot = 2 * kX86_64WordSize;
5359   __ subq(CpuRegister(RSP), Immediate(extra_slot));
5360   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5361   ExchangeMemory64(0, mem + extra_slot, 2);
5362   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5363   __ addq(CpuRegister(RSP), Immediate(extra_slot));
5364 }
5365 
ExchangeMemory32(int mem1,int mem2)5366 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5367   ScratchRegisterScope ensure_scratch(
5368       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5369 
5370   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5371   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5372   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5373           Address(CpuRegister(RSP), mem2 + stack_offset));
5374   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5375   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5376           CpuRegister(ensure_scratch.GetRegister()));
5377 }
5378 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5379 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5380   ScratchRegisterScope ensure_scratch(
5381       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5382 
5383   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5384 
5385   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5386   for (int i = 0; i < num_of_qwords; i++) {
5387     __ movq(CpuRegister(TMP),
5388             Address(CpuRegister(RSP), mem1 + stack_offset));
5389     __ movq(CpuRegister(ensure_scratch.GetRegister()),
5390             Address(CpuRegister(RSP), mem2 + stack_offset));
5391     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5392             CpuRegister(TMP));
5393     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5394             CpuRegister(ensure_scratch.GetRegister()));
5395     stack_offset += kX86_64WordSize;
5396   }
5397 }
5398 
EmitSwap(size_t index)5399 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5400   MoveOperands* move = moves_[index];
5401   Location source = move->GetSource();
5402   Location destination = move->GetDestination();
5403 
5404   if (source.IsRegister() && destination.IsRegister()) {
5405     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5406   } else if (source.IsRegister() && destination.IsStackSlot()) {
5407     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5408   } else if (source.IsStackSlot() && destination.IsRegister()) {
5409     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5410   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5411     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5412   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5413     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5414   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5415     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5416   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5417     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5418   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5419     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5420     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5421     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5422   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5423     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5424   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5425     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5426   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5427     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5428   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5429     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5430   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5431     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5432   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5433     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5434   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5435     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5436   } else {
5437     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5438   }
5439 }
5440 
5441 
SpillScratch(int reg)5442 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5443   __ pushq(CpuRegister(reg));
5444 }
5445 
5446 
RestoreScratch(int reg)5447 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5448   __ popq(CpuRegister(reg));
5449 }
5450 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5451 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5452     SlowPathCode* slow_path, CpuRegister class_reg) {
5453   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5454   const size_t status_byte_offset =
5455       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5456   constexpr uint32_t shifted_initialized_value =
5457       enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
5458 
5459   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
5460   __ j(kBelow, slow_path->GetEntryLabel());
5461   __ Bind(slow_path->GetExitLabel());
5462   // No need for memory fence, thanks to the x86-64 memory model.
5463 }
5464 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5465 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5466     HLoadClass::LoadKind desired_class_load_kind) {
5467   switch (desired_class_load_kind) {
5468     case HLoadClass::LoadKind::kInvalid:
5469       LOG(FATAL) << "UNREACHABLE";
5470       UNREACHABLE();
5471     case HLoadClass::LoadKind::kReferrersClass:
5472       break;
5473     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5474     case HLoadClass::LoadKind::kBootImageClassTable:
5475     case HLoadClass::LoadKind::kBssEntry:
5476       DCHECK(!Runtime::Current()->UseJitCompilation());
5477       break;
5478     case HLoadClass::LoadKind::kJitTableAddress:
5479       DCHECK(Runtime::Current()->UseJitCompilation());
5480       break;
5481     case HLoadClass::LoadKind::kBootImageAddress:
5482     case HLoadClass::LoadKind::kRuntimeCall:
5483       break;
5484   }
5485   return desired_class_load_kind;
5486 }
5487 
VisitLoadClass(HLoadClass * cls)5488 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5489   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5490   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5491     // Custom calling convention: RAX serves as both input and output.
5492     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5493         cls,
5494         Location::RegisterLocation(RAX),
5495         Location::RegisterLocation(RAX));
5496     return;
5497   }
5498   DCHECK(!cls->NeedsAccessCheck());
5499 
5500   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5501   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5502       ? LocationSummary::kCallOnSlowPath
5503       : LocationSummary::kNoCall;
5504   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5505   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5506     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5507   }
5508 
5509   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5510     locations->SetInAt(0, Location::RequiresRegister());
5511   }
5512   locations->SetOut(Location::RequiresRegister());
5513   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5514     if (!kUseReadBarrier || kUseBakerReadBarrier) {
5515       // Rely on the type resolution and/or initialization to save everything.
5516       // Custom calling convention: RAX serves as both input and output.
5517       RegisterSet caller_saves = RegisterSet::Empty();
5518       caller_saves.Add(Location::RegisterLocation(RAX));
5519       locations->SetCustomSlowPathCallerSaves(caller_saves);
5520     } else {
5521       // For non-Baker read barrier we have a temp-clobbering call.
5522     }
5523   }
5524 }
5525 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)5526 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5527                                                  dex::TypeIndex type_index,
5528                                                  Handle<mirror::Class> handle) {
5529   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
5530   // Add a patch entry and return the label.
5531   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
5532   PatchInfo<Label>* info = &jit_class_patches_.back();
5533   return &info->label;
5534 }
5535 
5536 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5537 // move.
VisitLoadClass(HLoadClass * cls)5538 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5539   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5540   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5541     codegen_->GenerateLoadClassRuntimeCall(cls);
5542     return;
5543   }
5544   DCHECK(!cls->NeedsAccessCheck());
5545 
5546   LocationSummary* locations = cls->GetLocations();
5547   Location out_loc = locations->Out();
5548   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5549 
5550   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5551       ? kWithoutReadBarrier
5552       : kCompilerReadBarrierOption;
5553   bool generate_null_check = false;
5554   switch (load_kind) {
5555     case HLoadClass::LoadKind::kReferrersClass: {
5556       DCHECK(!cls->CanCallRuntime());
5557       DCHECK(!cls->MustGenerateClinitCheck());
5558       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5559       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5560       GenerateGcRootFieldLoad(
5561           cls,
5562           out_loc,
5563           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5564           /* fixup_label */ nullptr,
5565           read_barrier_option);
5566       break;
5567     }
5568     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5569       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5570       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5571       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5572       codegen_->RecordBootImageTypePatch(cls);
5573       break;
5574     case HLoadClass::LoadKind::kBootImageAddress: {
5575       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5576       uint32_t address = dchecked_integral_cast<uint32_t>(
5577           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
5578       DCHECK_NE(address, 0u);
5579       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
5580       break;
5581     }
5582     case HLoadClass::LoadKind::kBootImageClassTable: {
5583       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5584       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5585       codegen_->RecordBootImageTypePatch(cls);
5586       // Extract the reference from the slot data, i.e. clear the hash bits.
5587       int32_t masked_hash = ClassTable::TableSlot::MaskHash(
5588           ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
5589       if (masked_hash != 0) {
5590         __ subl(out, Immediate(masked_hash));
5591       }
5592       break;
5593     }
5594     case HLoadClass::LoadKind::kBssEntry: {
5595       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5596                                           /* no_rip */ false);
5597       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5598       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
5599       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5600       generate_null_check = true;
5601       break;
5602     }
5603     case HLoadClass::LoadKind::kJitTableAddress: {
5604       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5605                                           /* no_rip */ true);
5606       Label* fixup_label =
5607           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5608       // /* GcRoot<mirror::Class> */ out = *address
5609       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5610       break;
5611     }
5612     default:
5613       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5614       UNREACHABLE();
5615   }
5616 
5617   if (generate_null_check || cls->MustGenerateClinitCheck()) {
5618     DCHECK(cls->CanCallRuntime());
5619     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
5620         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5621     codegen_->AddSlowPath(slow_path);
5622     if (generate_null_check) {
5623       __ testl(out, out);
5624       __ j(kEqual, slow_path->GetEntryLabel());
5625     }
5626     if (cls->MustGenerateClinitCheck()) {
5627       GenerateClassInitializationCheck(slow_path, out);
5628     } else {
5629       __ Bind(slow_path->GetExitLabel());
5630     }
5631   }
5632 }
5633 
VisitClinitCheck(HClinitCheck * check)5634 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5635   LocationSummary* locations =
5636       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5637   locations->SetInAt(0, Location::RequiresRegister());
5638   if (check->HasUses()) {
5639     locations->SetOut(Location::SameAsFirstInput());
5640   }
5641 }
5642 
VisitClinitCheck(HClinitCheck * check)5643 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5644   // We assume the class to not be null.
5645   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
5646       check->GetLoadClass(), check, check->GetDexPc(), true);
5647   codegen_->AddSlowPath(slow_path);
5648   GenerateClassInitializationCheck(slow_path,
5649                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5650 }
5651 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5652 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5653     HLoadString::LoadKind desired_string_load_kind) {
5654   switch (desired_string_load_kind) {
5655     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5656     case HLoadString::LoadKind::kBootImageInternTable:
5657     case HLoadString::LoadKind::kBssEntry:
5658       DCHECK(!Runtime::Current()->UseJitCompilation());
5659       break;
5660     case HLoadString::LoadKind::kJitTableAddress:
5661       DCHECK(Runtime::Current()->UseJitCompilation());
5662       break;
5663     case HLoadString::LoadKind::kBootImageAddress:
5664     case HLoadString::LoadKind::kRuntimeCall:
5665       break;
5666   }
5667   return desired_string_load_kind;
5668 }
5669 
VisitLoadString(HLoadString * load)5670 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5671   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5672   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5673   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5674     locations->SetOut(Location::RegisterLocation(RAX));
5675   } else {
5676     locations->SetOut(Location::RequiresRegister());
5677     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5678       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5679         // Rely on the pResolveString to save everything.
5680         // Custom calling convention: RAX serves as both input and output.
5681         RegisterSet caller_saves = RegisterSet::Empty();
5682         caller_saves.Add(Location::RegisterLocation(RAX));
5683         locations->SetCustomSlowPathCallerSaves(caller_saves);
5684       } else {
5685         // For non-Baker read barrier we have a temp-clobbering call.
5686       }
5687     }
5688   }
5689 }
5690 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)5691 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
5692                                                   dex::StringIndex string_index,
5693                                                   Handle<mirror::String> handle) {
5694   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
5695   // Add a patch entry and return the label.
5696   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
5697   PatchInfo<Label>* info = &jit_string_patches_.back();
5698   return &info->label;
5699 }
5700 
5701 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5702 // move.
VisitLoadString(HLoadString * load)5703 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5704   LocationSummary* locations = load->GetLocations();
5705   Location out_loc = locations->Out();
5706   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5707 
5708   switch (load->GetLoadKind()) {
5709     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5710       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5711       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5712       codegen_->RecordBootImageStringPatch(load);
5713       return;
5714     }
5715     case HLoadString::LoadKind::kBootImageAddress: {
5716       uint32_t address = dchecked_integral_cast<uint32_t>(
5717           reinterpret_cast<uintptr_t>(load->GetString().Get()));
5718       DCHECK_NE(address, 0u);
5719       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
5720       return;
5721     }
5722     case HLoadString::LoadKind::kBootImageInternTable: {
5723       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5724       __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5725       codegen_->RecordBootImageStringPatch(load);
5726       return;
5727     }
5728     case HLoadString::LoadKind::kBssEntry: {
5729       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5730                                           /* no_rip */ false);
5731       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
5732       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
5733       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5734       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
5735       codegen_->AddSlowPath(slow_path);
5736       __ testl(out, out);
5737       __ j(kEqual, slow_path->GetEntryLabel());
5738       __ Bind(slow_path->GetExitLabel());
5739       return;
5740     }
5741     case HLoadString::LoadKind::kJitTableAddress: {
5742       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5743                                           /* no_rip */ true);
5744       Label* fixup_label = codegen_->NewJitRootStringPatch(
5745           load->GetDexFile(), load->GetStringIndex(), load->GetString());
5746       // /* GcRoot<mirror::String> */ out = *address
5747       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5748       return;
5749     }
5750     default:
5751       break;
5752   }
5753 
5754   // TODO: Re-add the compiler code to do string dex cache lookup again.
5755   // Custom calling convention: RAX serves as both input and output.
5756   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
5757   codegen_->InvokeRuntime(kQuickResolveString,
5758                           load,
5759                           load->GetDexPc());
5760   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5761 }
5762 
GetExceptionTlsAddress()5763 static Address GetExceptionTlsAddress() {
5764   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
5765                            /* no_rip */ true);
5766 }
5767 
VisitLoadException(HLoadException * load)5768 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5769   LocationSummary* locations =
5770       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5771   locations->SetOut(Location::RequiresRegister());
5772 }
5773 
VisitLoadException(HLoadException * load)5774 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5775   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5776 }
5777 
VisitClearException(HClearException * clear)5778 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5779   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5780 }
5781 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5782 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5783   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5784 }
5785 
VisitThrow(HThrow * instruction)5786 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5787   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5788       instruction, LocationSummary::kCallOnMainOnly);
5789   InvokeRuntimeCallingConvention calling_convention;
5790   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5791 }
5792 
VisitThrow(HThrow * instruction)5793 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5794   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5795   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5796 }
5797 
CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5798 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5799   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
5800     // We need a temporary for holding the iftable length.
5801     return true;
5802   }
5803   return kEmitCompilerReadBarrier &&
5804       !kUseBakerReadBarrier &&
5805       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5806        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5807        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5808 }
5809 
InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5810 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5811   return kEmitCompilerReadBarrier &&
5812       !kUseBakerReadBarrier &&
5813       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5814        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5815        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5816 }
5817 
VisitInstanceOf(HInstanceOf * instruction)5818 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5819   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5820   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5821   bool baker_read_barrier_slow_path = false;
5822   switch (type_check_kind) {
5823     case TypeCheckKind::kExactCheck:
5824     case TypeCheckKind::kAbstractClassCheck:
5825     case TypeCheckKind::kClassHierarchyCheck:
5826     case TypeCheckKind::kArrayObjectCheck: {
5827       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
5828       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5829       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
5830       break;
5831     }
5832     case TypeCheckKind::kArrayCheck:
5833     case TypeCheckKind::kUnresolvedCheck:
5834     case TypeCheckKind::kInterfaceCheck:
5835       call_kind = LocationSummary::kCallOnSlowPath;
5836       break;
5837   }
5838 
5839   LocationSummary* locations =
5840       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
5841   if (baker_read_barrier_slow_path) {
5842     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5843   }
5844   locations->SetInAt(0, Location::RequiresRegister());
5845   locations->SetInAt(1, Location::Any());
5846   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5847   locations->SetOut(Location::RequiresRegister());
5848   // When read barriers are enabled, we need a temporary register for
5849   // some cases.
5850   if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
5851     locations->AddTemp(Location::RequiresRegister());
5852   }
5853 }
5854 
VisitInstanceOf(HInstanceOf * instruction)5855 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5856   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5857   LocationSummary* locations = instruction->GetLocations();
5858   Location obj_loc = locations->InAt(0);
5859   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5860   Location cls = locations->InAt(1);
5861   Location out_loc =  locations->Out();
5862   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5863   Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
5864       locations->GetTemp(0) :
5865       Location::NoLocation();
5866   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5867   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5868   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5869   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5870   SlowPathCode* slow_path = nullptr;
5871   NearLabel done, zero;
5872 
5873   // Return 0 if `obj` is null.
5874   // Avoid null check if we know obj is not null.
5875   if (instruction->MustDoNullCheck()) {
5876     __ testl(obj, obj);
5877     __ j(kEqual, &zero);
5878   }
5879 
5880   switch (type_check_kind) {
5881     case TypeCheckKind::kExactCheck: {
5882       ReadBarrierOption read_barrier_option =
5883           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5884       // /* HeapReference<Class> */ out = obj->klass_
5885       GenerateReferenceLoadTwoRegisters(instruction,
5886                                         out_loc,
5887                                         obj_loc,
5888                                         class_offset,
5889                                         read_barrier_option);
5890       if (cls.IsRegister()) {
5891         __ cmpl(out, cls.AsRegister<CpuRegister>());
5892       } else {
5893         DCHECK(cls.IsStackSlot()) << cls;
5894         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5895       }
5896       if (zero.IsLinked()) {
5897         // Classes must be equal for the instanceof to succeed.
5898         __ j(kNotEqual, &zero);
5899         __ movl(out, Immediate(1));
5900         __ jmp(&done);
5901       } else {
5902         __ setcc(kEqual, out);
5903         // setcc only sets the low byte.
5904         __ andl(out, Immediate(1));
5905       }
5906       break;
5907     }
5908 
5909     case TypeCheckKind::kAbstractClassCheck: {
5910       ReadBarrierOption read_barrier_option =
5911           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5912       // /* HeapReference<Class> */ out = obj->klass_
5913       GenerateReferenceLoadTwoRegisters(instruction,
5914                                         out_loc,
5915                                         obj_loc,
5916                                         class_offset,
5917                                         read_barrier_option);
5918       // If the class is abstract, we eagerly fetch the super class of the
5919       // object to avoid doing a comparison we know will fail.
5920       NearLabel loop, success;
5921       __ Bind(&loop);
5922       // /* HeapReference<Class> */ out = out->super_class_
5923       GenerateReferenceLoadOneRegister(instruction,
5924                                        out_loc,
5925                                        super_offset,
5926                                        maybe_temp_loc,
5927                                        read_barrier_option);
5928       __ testl(out, out);
5929       // If `out` is null, we use it for the result, and jump to `done`.
5930       __ j(kEqual, &done);
5931       if (cls.IsRegister()) {
5932         __ cmpl(out, cls.AsRegister<CpuRegister>());
5933       } else {
5934         DCHECK(cls.IsStackSlot()) << cls;
5935         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5936       }
5937       __ j(kNotEqual, &loop);
5938       __ movl(out, Immediate(1));
5939       if (zero.IsLinked()) {
5940         __ jmp(&done);
5941       }
5942       break;
5943     }
5944 
5945     case TypeCheckKind::kClassHierarchyCheck: {
5946       ReadBarrierOption read_barrier_option =
5947           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5948       // /* HeapReference<Class> */ out = obj->klass_
5949       GenerateReferenceLoadTwoRegisters(instruction,
5950                                         out_loc,
5951                                         obj_loc,
5952                                         class_offset,
5953                                         read_barrier_option);
5954       // Walk over the class hierarchy to find a match.
5955       NearLabel loop, success;
5956       __ Bind(&loop);
5957       if (cls.IsRegister()) {
5958         __ cmpl(out, cls.AsRegister<CpuRegister>());
5959       } else {
5960         DCHECK(cls.IsStackSlot()) << cls;
5961         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5962       }
5963       __ j(kEqual, &success);
5964       // /* HeapReference<Class> */ out = out->super_class_
5965       GenerateReferenceLoadOneRegister(instruction,
5966                                        out_loc,
5967                                        super_offset,
5968                                        maybe_temp_loc,
5969                                        read_barrier_option);
5970       __ testl(out, out);
5971       __ j(kNotEqual, &loop);
5972       // If `out` is null, we use it for the result, and jump to `done`.
5973       __ jmp(&done);
5974       __ Bind(&success);
5975       __ movl(out, Immediate(1));
5976       if (zero.IsLinked()) {
5977         __ jmp(&done);
5978       }
5979       break;
5980     }
5981 
5982     case TypeCheckKind::kArrayObjectCheck: {
5983       ReadBarrierOption read_barrier_option =
5984           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5985       // /* HeapReference<Class> */ out = obj->klass_
5986       GenerateReferenceLoadTwoRegisters(instruction,
5987                                         out_loc,
5988                                         obj_loc,
5989                                         class_offset,
5990                                         read_barrier_option);
5991       // Do an exact check.
5992       NearLabel exact_check;
5993       if (cls.IsRegister()) {
5994         __ cmpl(out, cls.AsRegister<CpuRegister>());
5995       } else {
5996         DCHECK(cls.IsStackSlot()) << cls;
5997         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5998       }
5999       __ j(kEqual, &exact_check);
6000       // Otherwise, we need to check that the object's class is a non-primitive array.
6001       // /* HeapReference<Class> */ out = out->component_type_
6002       GenerateReferenceLoadOneRegister(instruction,
6003                                        out_loc,
6004                                        component_offset,
6005                                        maybe_temp_loc,
6006                                        read_barrier_option);
6007       __ testl(out, out);
6008       // If `out` is null, we use it for the result, and jump to `done`.
6009       __ j(kEqual, &done);
6010       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6011       __ j(kNotEqual, &zero);
6012       __ Bind(&exact_check);
6013       __ movl(out, Immediate(1));
6014       __ jmp(&done);
6015       break;
6016     }
6017 
6018     case TypeCheckKind::kArrayCheck: {
6019       // No read barrier since the slow path will retry upon failure.
6020       // /* HeapReference<Class> */ out = obj->klass_
6021       GenerateReferenceLoadTwoRegisters(instruction,
6022                                         out_loc,
6023                                         obj_loc,
6024                                         class_offset,
6025                                         kWithoutReadBarrier);
6026       if (cls.IsRegister()) {
6027         __ cmpl(out, cls.AsRegister<CpuRegister>());
6028       } else {
6029         DCHECK(cls.IsStackSlot()) << cls;
6030         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6031       }
6032       DCHECK(locations->OnlyCallsOnSlowPath());
6033       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6034           instruction, /* is_fatal */ false);
6035       codegen_->AddSlowPath(slow_path);
6036       __ j(kNotEqual, slow_path->GetEntryLabel());
6037       __ movl(out, Immediate(1));
6038       if (zero.IsLinked()) {
6039         __ jmp(&done);
6040       }
6041       break;
6042     }
6043 
6044     case TypeCheckKind::kUnresolvedCheck:
6045     case TypeCheckKind::kInterfaceCheck: {
6046       // Note that we indeed only call on slow path, but we always go
6047       // into the slow path for the unresolved and interface check
6048       // cases.
6049       //
6050       // We cannot directly call the InstanceofNonTrivial runtime
6051       // entry point without resorting to a type checking slow path
6052       // here (i.e. by calling InvokeRuntime directly), as it would
6053       // require to assign fixed registers for the inputs of this
6054       // HInstanceOf instruction (following the runtime calling
6055       // convention), which might be cluttered by the potential first
6056       // read barrier emission at the beginning of this method.
6057       //
6058       // TODO: Introduce a new runtime entry point taking the object
6059       // to test (instead of its class) as argument, and let it deal
6060       // with the read barrier issues. This will let us refactor this
6061       // case of the `switch` code as it was previously (with a direct
6062       // call to the runtime not using a type checking slow path).
6063       // This should also be beneficial for the other cases above.
6064       DCHECK(locations->OnlyCallsOnSlowPath());
6065       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6066           instruction, /* is_fatal */ false);
6067       codegen_->AddSlowPath(slow_path);
6068       __ jmp(slow_path->GetEntryLabel());
6069       if (zero.IsLinked()) {
6070         __ jmp(&done);
6071       }
6072       break;
6073     }
6074   }
6075 
6076   if (zero.IsLinked()) {
6077     __ Bind(&zero);
6078     __ xorl(out, out);
6079   }
6080 
6081   if (done.IsLinked()) {
6082     __ Bind(&done);
6083   }
6084 
6085   if (slow_path != nullptr) {
6086     __ Bind(slow_path->GetExitLabel());
6087   }
6088 }
6089 
VisitCheckCast(HCheckCast * instruction)6090 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6091   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6092   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6093   LocationSummary* locations =
6094       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6095   locations->SetInAt(0, Location::RequiresRegister());
6096   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6097     // Require a register for the interface check since there is a loop that compares the class to
6098     // a memory address.
6099     locations->SetInAt(1, Location::RequiresRegister());
6100   } else {
6101     locations->SetInAt(1, Location::Any());
6102   }
6103 
6104   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
6105   locations->AddTemp(Location::RequiresRegister());
6106   // When read barriers are enabled, we need an additional temporary
6107   // register for some cases.
6108   if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
6109     locations->AddTemp(Location::RequiresRegister());
6110   }
6111 }
6112 
VisitCheckCast(HCheckCast * instruction)6113 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6114   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6115   LocationSummary* locations = instruction->GetLocations();
6116   Location obj_loc = locations->InAt(0);
6117   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6118   Location cls = locations->InAt(1);
6119   Location temp_loc = locations->GetTemp(0);
6120   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6121   Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
6122       locations->GetTemp(1) :
6123       Location::NoLocation();
6124   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6125   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6126   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6127   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6128   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6129   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6130   const uint32_t object_array_data_offset =
6131       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6132 
6133   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6134   SlowPathCode* type_check_slow_path =
6135       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6136           instruction, is_type_check_slow_path_fatal);
6137   codegen_->AddSlowPath(type_check_slow_path);
6138 
6139 
6140   NearLabel done;
6141   // Avoid null check if we know obj is not null.
6142   if (instruction->MustDoNullCheck()) {
6143     __ testl(obj, obj);
6144     __ j(kEqual, &done);
6145   }
6146 
6147   switch (type_check_kind) {
6148     case TypeCheckKind::kExactCheck:
6149     case TypeCheckKind::kArrayCheck: {
6150       // /* HeapReference<Class> */ temp = obj->klass_
6151       GenerateReferenceLoadTwoRegisters(instruction,
6152                                         temp_loc,
6153                                         obj_loc,
6154                                         class_offset,
6155                                         kWithoutReadBarrier);
6156       if (cls.IsRegister()) {
6157         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6158       } else {
6159         DCHECK(cls.IsStackSlot()) << cls;
6160         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6161       }
6162       // Jump to slow path for throwing the exception or doing a
6163       // more involved array check.
6164       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6165       break;
6166     }
6167 
6168     case TypeCheckKind::kAbstractClassCheck: {
6169       // /* HeapReference<Class> */ temp = obj->klass_
6170       GenerateReferenceLoadTwoRegisters(instruction,
6171                                         temp_loc,
6172                                         obj_loc,
6173                                         class_offset,
6174                                         kWithoutReadBarrier);
6175       // If the class is abstract, we eagerly fetch the super class of the
6176       // object to avoid doing a comparison we know will fail.
6177       NearLabel loop;
6178       __ Bind(&loop);
6179       // /* HeapReference<Class> */ temp = temp->super_class_
6180       GenerateReferenceLoadOneRegister(instruction,
6181                                        temp_loc,
6182                                        super_offset,
6183                                        maybe_temp2_loc,
6184                                        kWithoutReadBarrier);
6185 
6186       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6187       // exception.
6188       __ testl(temp, temp);
6189       // Otherwise, compare the classes.
6190       __ j(kZero, type_check_slow_path->GetEntryLabel());
6191       if (cls.IsRegister()) {
6192         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6193       } else {
6194         DCHECK(cls.IsStackSlot()) << cls;
6195         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6196       }
6197       __ j(kNotEqual, &loop);
6198       break;
6199     }
6200 
6201     case TypeCheckKind::kClassHierarchyCheck: {
6202       // /* HeapReference<Class> */ temp = obj->klass_
6203       GenerateReferenceLoadTwoRegisters(instruction,
6204                                         temp_loc,
6205                                         obj_loc,
6206                                         class_offset,
6207                                         kWithoutReadBarrier);
6208       // Walk over the class hierarchy to find a match.
6209       NearLabel loop;
6210       __ Bind(&loop);
6211       if (cls.IsRegister()) {
6212         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6213       } else {
6214         DCHECK(cls.IsStackSlot()) << cls;
6215         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6216       }
6217       __ j(kEqual, &done);
6218 
6219       // /* HeapReference<Class> */ temp = temp->super_class_
6220       GenerateReferenceLoadOneRegister(instruction,
6221                                        temp_loc,
6222                                        super_offset,
6223                                        maybe_temp2_loc,
6224                                        kWithoutReadBarrier);
6225 
6226       // If the class reference currently in `temp` is not null, jump
6227       // back at the beginning of the loop.
6228       __ testl(temp, temp);
6229       __ j(kNotZero, &loop);
6230       // Otherwise, jump to the slow path to throw the exception.
6231       __ jmp(type_check_slow_path->GetEntryLabel());
6232       break;
6233     }
6234 
6235     case TypeCheckKind::kArrayObjectCheck: {
6236       // /* HeapReference<Class> */ temp = obj->klass_
6237       GenerateReferenceLoadTwoRegisters(instruction,
6238                                         temp_loc,
6239                                         obj_loc,
6240                                         class_offset,
6241                                         kWithoutReadBarrier);
6242       // Do an exact check.
6243       NearLabel check_non_primitive_component_type;
6244       if (cls.IsRegister()) {
6245         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6246       } else {
6247         DCHECK(cls.IsStackSlot()) << cls;
6248         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6249       }
6250       __ j(kEqual, &done);
6251 
6252       // Otherwise, we need to check that the object's class is a non-primitive array.
6253       // /* HeapReference<Class> */ temp = temp->component_type_
6254       GenerateReferenceLoadOneRegister(instruction,
6255                                        temp_loc,
6256                                        component_offset,
6257                                        maybe_temp2_loc,
6258                                        kWithoutReadBarrier);
6259 
6260       // If the component type is not null (i.e. the object is indeed
6261       // an array), jump to label `check_non_primitive_component_type`
6262       // to further check that this component type is not a primitive
6263       // type.
6264       __ testl(temp, temp);
6265       // Otherwise, jump to the slow path to throw the exception.
6266       __ j(kZero, type_check_slow_path->GetEntryLabel());
6267       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6268       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6269       break;
6270     }
6271 
6272     case TypeCheckKind::kUnresolvedCheck: {
6273       // We always go into the type check slow path for the unresolved case.
6274       //
6275       // We cannot directly call the CheckCast runtime entry point
6276       // without resorting to a type checking slow path here (i.e. by
6277       // calling InvokeRuntime directly), as it would require to
6278       // assign fixed registers for the inputs of this HInstanceOf
6279       // instruction (following the runtime calling convention), which
6280       // might be cluttered by the potential first read barrier
6281       // emission at the beginning of this method.
6282       __ jmp(type_check_slow_path->GetEntryLabel());
6283       break;
6284     }
6285 
6286     case TypeCheckKind::kInterfaceCheck:
6287       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6288       // We can not get false positives by doing this.
6289       // /* HeapReference<Class> */ temp = obj->klass_
6290       GenerateReferenceLoadTwoRegisters(instruction,
6291                                         temp_loc,
6292                                         obj_loc,
6293                                         class_offset,
6294                                         kWithoutReadBarrier);
6295 
6296       // /* HeapReference<Class> */ temp = temp->iftable_
6297       GenerateReferenceLoadTwoRegisters(instruction,
6298                                         temp_loc,
6299                                         temp_loc,
6300                                         iftable_offset,
6301                                         kWithoutReadBarrier);
6302       // Iftable is never null.
6303       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6304       // Maybe poison the `cls` for direct comparison with memory.
6305       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6306       // Loop through the iftable and check if any class matches.
6307       NearLabel start_loop;
6308       __ Bind(&start_loop);
6309       // Need to subtract first to handle the empty array case.
6310       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6311       __ j(kNegative, type_check_slow_path->GetEntryLabel());
6312       // Go to next interface if the classes do not match.
6313       __ cmpl(cls.AsRegister<CpuRegister>(),
6314               CodeGeneratorX86_64::ArrayAddress(temp,
6315                                                 maybe_temp2_loc,
6316                                                 TIMES_4,
6317                                                 object_array_data_offset));
6318       __ j(kNotEqual, &start_loop);  // Return if same class.
6319       // If `cls` was poisoned above, unpoison it.
6320       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6321       break;
6322   }
6323 
6324   if (done.IsLinked()) {
6325     __ Bind(&done);
6326   }
6327 
6328   __ Bind(type_check_slow_path->GetExitLabel());
6329 }
6330 
VisitMonitorOperation(HMonitorOperation * instruction)6331 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6332   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6333       instruction, LocationSummary::kCallOnMainOnly);
6334   InvokeRuntimeCallingConvention calling_convention;
6335   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6336 }
6337 
VisitMonitorOperation(HMonitorOperation * instruction)6338 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6339   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6340                           instruction,
6341                           instruction->GetDexPc());
6342   if (instruction->IsEnter()) {
6343     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6344   } else {
6345     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6346   }
6347 }
6348 
VisitAnd(HAnd * instruction)6349 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6350 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6351 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6352 
HandleBitwiseOperation(HBinaryOperation * instruction)6353 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6354   LocationSummary* locations =
6355       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6356   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6357          || instruction->GetResultType() == DataType::Type::kInt64);
6358   locations->SetInAt(0, Location::RequiresRegister());
6359   locations->SetInAt(1, Location::Any());
6360   locations->SetOut(Location::SameAsFirstInput());
6361 }
6362 
VisitAnd(HAnd * instruction)6363 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6364   HandleBitwiseOperation(instruction);
6365 }
6366 
VisitOr(HOr * instruction)6367 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6368   HandleBitwiseOperation(instruction);
6369 }
6370 
VisitXor(HXor * instruction)6371 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6372   HandleBitwiseOperation(instruction);
6373 }
6374 
HandleBitwiseOperation(HBinaryOperation * instruction)6375 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6376   LocationSummary* locations = instruction->GetLocations();
6377   Location first = locations->InAt(0);
6378   Location second = locations->InAt(1);
6379   DCHECK(first.Equals(locations->Out()));
6380 
6381   if (instruction->GetResultType() == DataType::Type::kInt32) {
6382     if (second.IsRegister()) {
6383       if (instruction->IsAnd()) {
6384         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6385       } else if (instruction->IsOr()) {
6386         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6387       } else {
6388         DCHECK(instruction->IsXor());
6389         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6390       }
6391     } else if (second.IsConstant()) {
6392       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6393       if (instruction->IsAnd()) {
6394         __ andl(first.AsRegister<CpuRegister>(), imm);
6395       } else if (instruction->IsOr()) {
6396         __ orl(first.AsRegister<CpuRegister>(), imm);
6397       } else {
6398         DCHECK(instruction->IsXor());
6399         __ xorl(first.AsRegister<CpuRegister>(), imm);
6400       }
6401     } else {
6402       Address address(CpuRegister(RSP), second.GetStackIndex());
6403       if (instruction->IsAnd()) {
6404         __ andl(first.AsRegister<CpuRegister>(), address);
6405       } else if (instruction->IsOr()) {
6406         __ orl(first.AsRegister<CpuRegister>(), address);
6407       } else {
6408         DCHECK(instruction->IsXor());
6409         __ xorl(first.AsRegister<CpuRegister>(), address);
6410       }
6411     }
6412   } else {
6413     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
6414     CpuRegister first_reg = first.AsRegister<CpuRegister>();
6415     bool second_is_constant = false;
6416     int64_t value = 0;
6417     if (second.IsConstant()) {
6418       second_is_constant = true;
6419       value = second.GetConstant()->AsLongConstant()->GetValue();
6420     }
6421     bool is_int32_value = IsInt<32>(value);
6422 
6423     if (instruction->IsAnd()) {
6424       if (second_is_constant) {
6425         if (is_int32_value) {
6426           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6427         } else {
6428           __ andq(first_reg, codegen_->LiteralInt64Address(value));
6429         }
6430       } else if (second.IsDoubleStackSlot()) {
6431         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6432       } else {
6433         __ andq(first_reg, second.AsRegister<CpuRegister>());
6434       }
6435     } else if (instruction->IsOr()) {
6436       if (second_is_constant) {
6437         if (is_int32_value) {
6438           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6439         } else {
6440           __ orq(first_reg, codegen_->LiteralInt64Address(value));
6441         }
6442       } else if (second.IsDoubleStackSlot()) {
6443         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6444       } else {
6445         __ orq(first_reg, second.AsRegister<CpuRegister>());
6446       }
6447     } else {
6448       DCHECK(instruction->IsXor());
6449       if (second_is_constant) {
6450         if (is_int32_value) {
6451           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6452         } else {
6453           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6454         }
6455       } else if (second.IsDoubleStackSlot()) {
6456         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6457       } else {
6458         __ xorq(first_reg, second.AsRegister<CpuRegister>());
6459       }
6460     }
6461   }
6462 }
6463 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6464 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6465     HInstruction* instruction,
6466     Location out,
6467     uint32_t offset,
6468     Location maybe_temp,
6469     ReadBarrierOption read_barrier_option) {
6470   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6471   if (read_barrier_option == kWithReadBarrier) {
6472     CHECK(kEmitCompilerReadBarrier);
6473     if (kUseBakerReadBarrier) {
6474       // Load with fast path based Baker's read barrier.
6475       // /* HeapReference<Object> */ out = *(out + offset)
6476       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6477           instruction, out, out_reg, offset, /* needs_null_check */ false);
6478     } else {
6479       // Load with slow path based read barrier.
6480       // Save the value of `out` into `maybe_temp` before overwriting it
6481       // in the following move operation, as we will need it for the
6482       // read barrier below.
6483       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6484       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6485       // /* HeapReference<Object> */ out = *(out + offset)
6486       __ movl(out_reg, Address(out_reg, offset));
6487       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6488     }
6489   } else {
6490     // Plain load with no read barrier.
6491     // /* HeapReference<Object> */ out = *(out + offset)
6492     __ movl(out_reg, Address(out_reg, offset));
6493     __ MaybeUnpoisonHeapReference(out_reg);
6494   }
6495 }
6496 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)6497 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6498     HInstruction* instruction,
6499     Location out,
6500     Location obj,
6501     uint32_t offset,
6502     ReadBarrierOption read_barrier_option) {
6503   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6504   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6505   if (read_barrier_option == kWithReadBarrier) {
6506     CHECK(kEmitCompilerReadBarrier);
6507     if (kUseBakerReadBarrier) {
6508       // Load with fast path based Baker's read barrier.
6509       // /* HeapReference<Object> */ out = *(obj + offset)
6510       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6511           instruction, out, obj_reg, offset, /* needs_null_check */ false);
6512     } else {
6513       // Load with slow path based read barrier.
6514       // /* HeapReference<Object> */ out = *(obj + offset)
6515       __ movl(out_reg, Address(obj_reg, offset));
6516       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6517     }
6518   } else {
6519     // Plain load with no read barrier.
6520     // /* HeapReference<Object> */ out = *(obj + offset)
6521     __ movl(out_reg, Address(obj_reg, offset));
6522     __ MaybeUnpoisonHeapReference(out_reg);
6523   }
6524 }
6525 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)6526 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
6527     HInstruction* instruction,
6528     Location root,
6529     const Address& address,
6530     Label* fixup_label,
6531     ReadBarrierOption read_barrier_option) {
6532   CpuRegister root_reg = root.AsRegister<CpuRegister>();
6533   if (read_barrier_option == kWithReadBarrier) {
6534     DCHECK(kEmitCompilerReadBarrier);
6535     if (kUseBakerReadBarrier) {
6536       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6537       // Baker's read barrier are used:
6538       //
6539       //   root = obj.field;
6540       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6541       //   if (temp != null) {
6542       //     root = temp(root)
6543       //   }
6544 
6545       // /* GcRoot<mirror::Object> */ root = *address
6546       __ movl(root_reg, address);
6547       if (fixup_label != nullptr) {
6548         __ Bind(fixup_label);
6549       }
6550       static_assert(
6551           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6552           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6553           "have different sizes.");
6554       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6555                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
6556                     "have different sizes.");
6557 
6558       // Slow path marking the GC root `root`.
6559       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
6560           instruction, root, /* unpoison_ref_before_marking */ false);
6561       codegen_->AddSlowPath(slow_path);
6562 
6563       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
6564       const int32_t entry_point_offset =
6565           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
6566       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
6567       // The entrypoint is null when the GC is not marking.
6568       __ j(kNotEqual, slow_path->GetEntryLabel());
6569       __ Bind(slow_path->GetExitLabel());
6570     } else {
6571       // GC root loaded through a slow path for read barriers other
6572       // than Baker's.
6573       // /* GcRoot<mirror::Object>* */ root = address
6574       __ leaq(root_reg, address);
6575       if (fixup_label != nullptr) {
6576         __ Bind(fixup_label);
6577       }
6578       // /* mirror::Object* */ root = root->Read()
6579       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6580     }
6581   } else {
6582     // Plain GC root load with no read barrier.
6583     // /* GcRoot<mirror::Object> */ root = *address
6584     __ movl(root_reg, address);
6585     if (fixup_label != nullptr) {
6586       __ Bind(fixup_label);
6587     }
6588     // Note that GC roots are not affected by heap poisoning, thus we
6589     // do not have to unpoison `root_reg` here.
6590   }
6591 }
6592 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)6593 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6594                                                                 Location ref,
6595                                                                 CpuRegister obj,
6596                                                                 uint32_t offset,
6597                                                                 bool needs_null_check) {
6598   DCHECK(kEmitCompilerReadBarrier);
6599   DCHECK(kUseBakerReadBarrier);
6600 
6601   // /* HeapReference<Object> */ ref = *(obj + offset)
6602   Address src(obj, offset);
6603   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6604 }
6605 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)6606 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6607                                                                 Location ref,
6608                                                                 CpuRegister obj,
6609                                                                 uint32_t data_offset,
6610                                                                 Location index,
6611                                                                 bool needs_null_check) {
6612   DCHECK(kEmitCompilerReadBarrier);
6613   DCHECK(kUseBakerReadBarrier);
6614 
6615   static_assert(
6616       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6617       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6618   // /* HeapReference<Object> */ ref =
6619   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6620   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
6621   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6622 }
6623 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)6624 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6625                                                                     Location ref,
6626                                                                     CpuRegister obj,
6627                                                                     const Address& src,
6628                                                                     bool needs_null_check,
6629                                                                     bool always_update_field,
6630                                                                     CpuRegister* temp1,
6631                                                                     CpuRegister* temp2) {
6632   DCHECK(kEmitCompilerReadBarrier);
6633   DCHECK(kUseBakerReadBarrier);
6634 
6635   // In slow path based read barriers, the read barrier call is
6636   // inserted after the original load. However, in fast path based
6637   // Baker's read barriers, we need to perform the load of
6638   // mirror::Object::monitor_ *before* the original reference load.
6639   // This load-load ordering is required by the read barrier.
6640   // The fast path/slow path (for Baker's algorithm) should look like:
6641   //
6642   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6643   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6644   //   HeapReference<Object> ref = *src;  // Original reference load.
6645   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
6646   //   if (is_gray) {
6647   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6648   //   }
6649   //
6650   // Note: the original implementation in ReadBarrier::Barrier is
6651   // slightly more complex as:
6652   // - it implements the load-load fence using a data dependency on
6653   //   the high-bits of rb_state, which are expected to be all zeroes
6654   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6655   //   here, which is a no-op thanks to the x86-64 memory model);
6656   // - it performs additional checks that we do not do here for
6657   //   performance reasons.
6658 
6659   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6660   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6661 
6662   // Given the numeric representation, it's enough to check the low bit of the rb_state.
6663   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
6664   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6665   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
6666   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
6667   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
6668 
6669   // if (rb_state == ReadBarrier::GrayState())
6670   //   ref = ReadBarrier::Mark(ref);
6671   // At this point, just do the "if" and make sure that flags are preserved until the branch.
6672   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
6673   if (needs_null_check) {
6674     MaybeRecordImplicitNullCheck(instruction);
6675   }
6676 
6677   // Load fence to prevent load-load reordering.
6678   // Note that this is a no-op, thanks to the x86-64 memory model.
6679   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6680 
6681   // The actual reference load.
6682   // /* HeapReference<Object> */ ref = *src
6683   __ movl(ref_reg, src);  // Flags are unaffected.
6684 
6685   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
6686   // Slow path marking the object `ref` when it is gray.
6687   SlowPathCode* slow_path;
6688   if (always_update_field) {
6689     DCHECK(temp1 != nullptr);
6690     DCHECK(temp2 != nullptr);
6691     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
6692         instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
6693   } else {
6694     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
6695         instruction, ref, /* unpoison_ref_before_marking */ true);
6696   }
6697   AddSlowPath(slow_path);
6698 
6699   // We have done the "if" of the gray bit check above, now branch based on the flags.
6700   __ j(kNotZero, slow_path->GetEntryLabel());
6701 
6702   // Object* ref = ref_addr->AsMirrorPtr()
6703   __ MaybeUnpoisonHeapReference(ref_reg);
6704 
6705   __ Bind(slow_path->GetExitLabel());
6706 }
6707 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6708 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6709                                                   Location out,
6710                                                   Location ref,
6711                                                   Location obj,
6712                                                   uint32_t offset,
6713                                                   Location index) {
6714   DCHECK(kEmitCompilerReadBarrier);
6715 
6716   // Insert a slow path based read barrier *after* the reference load.
6717   //
6718   // If heap poisoning is enabled, the unpoisoning of the loaded
6719   // reference will be carried out by the runtime within the slow
6720   // path.
6721   //
6722   // Note that `ref` currently does not get unpoisoned (when heap
6723   // poisoning is enabled), which is alright as the `ref` argument is
6724   // not used by the artReadBarrierSlow entry point.
6725   //
6726   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6727   SlowPathCode* slow_path = new (GetScopedAllocator())
6728       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6729   AddSlowPath(slow_path);
6730 
6731   __ jmp(slow_path->GetEntryLabel());
6732   __ Bind(slow_path->GetExitLabel());
6733 }
6734 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6735 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6736                                                        Location out,
6737                                                        Location ref,
6738                                                        Location obj,
6739                                                        uint32_t offset,
6740                                                        Location index) {
6741   if (kEmitCompilerReadBarrier) {
6742     // Baker's read barriers shall be handled by the fast path
6743     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6744     DCHECK(!kUseBakerReadBarrier);
6745     // If heap poisoning is enabled, unpoisoning will be taken care of
6746     // by the runtime within the slow path.
6747     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6748   } else if (kPoisonHeapReferences) {
6749     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6750   }
6751 }
6752 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6753 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6754                                                          Location out,
6755                                                          Location root) {
6756   DCHECK(kEmitCompilerReadBarrier);
6757 
6758   // Insert a slow path based read barrier *after* the GC root load.
6759   //
6760   // Note that GC roots are not affected by heap poisoning, so we do
6761   // not need to do anything special for this here.
6762   SlowPathCode* slow_path =
6763       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6764   AddSlowPath(slow_path);
6765 
6766   __ jmp(slow_path->GetEntryLabel());
6767   __ Bind(slow_path->GetExitLabel());
6768 }
6769 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6770 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6771   // Nothing to do, this should be removed during prepare for register allocator.
6772   LOG(FATAL) << "Unreachable";
6773 }
6774 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6775 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6776   // Nothing to do, this should be removed during prepare for register allocator.
6777   LOG(FATAL) << "Unreachable";
6778 }
6779 
6780 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6781 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6782   LocationSummary* locations =
6783       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6784   locations->SetInAt(0, Location::RequiresRegister());
6785   locations->AddTemp(Location::RequiresRegister());
6786   locations->AddTemp(Location::RequiresRegister());
6787 }
6788 
VisitPackedSwitch(HPackedSwitch * switch_instr)6789 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6790   int32_t lower_bound = switch_instr->GetStartValue();
6791   uint32_t num_entries = switch_instr->GetNumEntries();
6792   LocationSummary* locations = switch_instr->GetLocations();
6793   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6794   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6795   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6796   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6797 
6798   // Should we generate smaller inline compare/jumps?
6799   if (num_entries <= kPackedSwitchJumpTableThreshold) {
6800     // Figure out the correct compare values and jump conditions.
6801     // Handle the first compare/branch as a special case because it might
6802     // jump to the default case.
6803     DCHECK_GT(num_entries, 2u);
6804     Condition first_condition;
6805     uint32_t index;
6806     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6807     if (lower_bound != 0) {
6808       first_condition = kLess;
6809       __ cmpl(value_reg_in, Immediate(lower_bound));
6810       __ j(first_condition, codegen_->GetLabelOf(default_block));
6811       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6812 
6813       index = 1;
6814     } else {
6815       // Handle all the compare/jumps below.
6816       first_condition = kBelow;
6817       index = 0;
6818     }
6819 
6820     // Handle the rest of the compare/jumps.
6821     for (; index + 1 < num_entries; index += 2) {
6822       int32_t compare_to_value = lower_bound + index + 1;
6823       __ cmpl(value_reg_in, Immediate(compare_to_value));
6824       // Jump to successors[index] if value < case_value[index].
6825       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6826       // Jump to successors[index + 1] if value == case_value[index + 1].
6827       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6828     }
6829 
6830     if (index != num_entries) {
6831       // There are an odd number of entries. Handle the last one.
6832       DCHECK_EQ(index + 1, num_entries);
6833       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6834       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6835     }
6836 
6837     // And the default for any other value.
6838     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6839       __ jmp(codegen_->GetLabelOf(default_block));
6840     }
6841     return;
6842   }
6843 
6844   // Remove the bias, if needed.
6845   Register value_reg_out = value_reg_in.AsRegister();
6846   if (lower_bound != 0) {
6847     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6848     value_reg_out = temp_reg.AsRegister();
6849   }
6850   CpuRegister value_reg(value_reg_out);
6851 
6852   // Is the value in range?
6853   __ cmpl(value_reg, Immediate(num_entries - 1));
6854   __ j(kAbove, codegen_->GetLabelOf(default_block));
6855 
6856   // We are in the range of the table.
6857   // Load the address of the jump table in the constant area.
6858   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6859 
6860   // Load the (signed) offset from the jump table.
6861   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6862 
6863   // Add the offset to the address of the table base.
6864   __ addq(temp_reg, base_reg);
6865 
6866   // And jump.
6867   __ jmp(temp_reg);
6868 }
6869 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)6870 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
6871                                                       ATTRIBUTE_UNUSED) {
6872   LOG(FATAL) << "Unreachable";
6873 }
6874 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)6875 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
6876                                                               ATTRIBUTE_UNUSED) {
6877   LOG(FATAL) << "Unreachable";
6878 }
6879 
Load32BitValue(CpuRegister dest,int32_t value)6880 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6881   if (value == 0) {
6882     __ xorl(dest, dest);
6883   } else {
6884     __ movl(dest, Immediate(value));
6885   }
6886 }
6887 
Load64BitValue(CpuRegister dest,int64_t value)6888 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6889   if (value == 0) {
6890     // Clears upper bits too.
6891     __ xorl(dest, dest);
6892   } else if (IsUint<32>(value)) {
6893     // We can use a 32 bit move, as it will zero-extend and is shorter.
6894     __ movl(dest, Immediate(static_cast<int32_t>(value)));
6895   } else {
6896     __ movq(dest, Immediate(value));
6897   }
6898 }
6899 
Load32BitValue(XmmRegister dest,int32_t value)6900 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6901   if (value == 0) {
6902     __ xorps(dest, dest);
6903   } else {
6904     __ movss(dest, LiteralInt32Address(value));
6905   }
6906 }
6907 
Load64BitValue(XmmRegister dest,int64_t value)6908 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6909   if (value == 0) {
6910     __ xorpd(dest, dest);
6911   } else {
6912     __ movsd(dest, LiteralInt64Address(value));
6913   }
6914 }
6915 
Load32BitValue(XmmRegister dest,float value)6916 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6917   Load32BitValue(dest, bit_cast<int32_t, float>(value));
6918 }
6919 
Load64BitValue(XmmRegister dest,double value)6920 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6921   Load64BitValue(dest, bit_cast<int64_t, double>(value));
6922 }
6923 
Compare32BitValue(CpuRegister dest,int32_t value)6924 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6925   if (value == 0) {
6926     __ testl(dest, dest);
6927   } else {
6928     __ cmpl(dest, Immediate(value));
6929   }
6930 }
6931 
Compare64BitValue(CpuRegister dest,int64_t value)6932 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6933   if (IsInt<32>(value)) {
6934     if (value == 0) {
6935       __ testq(dest, dest);
6936     } else {
6937       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6938     }
6939   } else {
6940     // Value won't fit in an int.
6941     __ cmpq(dest, LiteralInt64Address(value));
6942   }
6943 }
6944 
GenerateIntCompare(Location lhs,Location rhs)6945 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
6946   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6947   GenerateIntCompare(lhs_reg, rhs);
6948 }
6949 
GenerateIntCompare(CpuRegister lhs,Location rhs)6950 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
6951   if (rhs.IsConstant()) {
6952     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
6953     Compare32BitValue(lhs, value);
6954   } else if (rhs.IsStackSlot()) {
6955     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6956   } else {
6957     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
6958   }
6959 }
6960 
GenerateLongCompare(Location lhs,Location rhs)6961 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
6962   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6963   if (rhs.IsConstant()) {
6964     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
6965     Compare64BitValue(lhs_reg, value);
6966   } else if (rhs.IsDoubleStackSlot()) {
6967     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6968   } else {
6969     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
6970   }
6971 }
6972 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)6973 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
6974                                           Location index,
6975                                           ScaleFactor scale,
6976                                           uint32_t data_offset) {
6977   return index.IsConstant() ?
6978       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
6979       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
6980 }
6981 
Store64BitValueToStack(Location dest,int64_t value)6982 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6983   DCHECK(dest.IsDoubleStackSlot());
6984   if (IsInt<32>(value)) {
6985     // Can move directly as an int32 constant.
6986     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6987             Immediate(static_cast<int32_t>(value)));
6988   } else {
6989     Load64BitValue(CpuRegister(TMP), value);
6990     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6991   }
6992 }
6993 
6994 /**
6995  * Class to handle late fixup of offsets into constant area.
6996  */
6997 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6998  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6999   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7000       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7001 
7002  protected:
SetOffset(size_t offset)7003   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7004 
7005   CodeGeneratorX86_64* codegen_;
7006 
7007  private:
Process(const MemoryRegion & region,int pos)7008   void Process(const MemoryRegion& region, int pos) OVERRIDE {
7009     // Patch the correct offset for the instruction.  We use the address of the
7010     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7011     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7012     int32_t relative_position = constant_offset - pos;
7013 
7014     // Patch in the right value.
7015     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7016   }
7017 
7018   // Location in constant area that the fixup refers to.
7019   size_t offset_into_constant_area_;
7020 };
7021 
7022 /**
7023  t * Class to handle late fixup of offsets to a jump table that will be created in the
7024  * constant area.
7025  */
7026 class JumpTableRIPFixup : public RIPFixup {
7027  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7028   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7029       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7030 
CreateJumpTable()7031   void CreateJumpTable() {
7032     X86_64Assembler* assembler = codegen_->GetAssembler();
7033 
7034     // Ensure that the reference to the jump table has the correct offset.
7035     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7036     SetOffset(offset_in_constant_table);
7037 
7038     // Compute the offset from the start of the function to this jump table.
7039     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7040 
7041     // Populate the jump table with the correct values for the jump table.
7042     int32_t num_entries = switch_instr_->GetNumEntries();
7043     HBasicBlock* block = switch_instr_->GetBlock();
7044     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7045     // The value that we want is the target offset - the position of the table.
7046     for (int32_t i = 0; i < num_entries; i++) {
7047       HBasicBlock* b = successors[i];
7048       Label* l = codegen_->GetLabelOf(b);
7049       DCHECK(l->IsBound());
7050       int32_t offset_to_block = l->Position() - current_table_offset;
7051       assembler->AppendInt32(offset_to_block);
7052     }
7053   }
7054 
7055  private:
7056   const HPackedSwitch* switch_instr_;
7057 };
7058 
Finalize(CodeAllocator * allocator)7059 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7060   // Generate the constant area if needed.
7061   X86_64Assembler* assembler = GetAssembler();
7062   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7063     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7064     assembler->Align(4, 0);
7065     constant_area_start_ = assembler->CodeSize();
7066 
7067     // Populate any jump tables.
7068     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7069       jump_table->CreateJumpTable();
7070     }
7071 
7072     // And now add the constant area to the generated code.
7073     assembler->AddConstantArea();
7074   }
7075 
7076   // And finish up.
7077   CodeGenerator::Finalize(allocator);
7078 }
7079 
LiteralDoubleAddress(double v)7080 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7081   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7082   return Address::RIP(fixup);
7083 }
7084 
LiteralFloatAddress(float v)7085 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7086   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7087   return Address::RIP(fixup);
7088 }
7089 
LiteralInt32Address(int32_t v)7090 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7091   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7092   return Address::RIP(fixup);
7093 }
7094 
LiteralInt64Address(int64_t v)7095 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7096   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7097   return Address::RIP(fixup);
7098 }
7099 
7100 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7101 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7102   if (!trg.IsValid()) {
7103     DCHECK_EQ(type, DataType::Type::kVoid);
7104     return;
7105   }
7106 
7107   DCHECK_NE(type, DataType::Type::kVoid);
7108 
7109   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7110   if (trg.Equals(return_loc)) {
7111     return;
7112   }
7113 
7114   // Let the parallel move resolver take care of all of this.
7115   HParallelMove parallel_move(GetGraph()->GetAllocator());
7116   parallel_move.AddMove(return_loc, trg, type, nullptr);
7117   GetMoveResolver()->EmitNativeCode(&parallel_move);
7118 }
7119 
LiteralCaseTable(HPackedSwitch * switch_instr)7120 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7121   // Create a fixup to be used to create and address the jump table.
7122   JumpTableRIPFixup* table_fixup =
7123       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7124 
7125   // We have to populate the jump tables.
7126   fixups_to_jump_tables_.push_back(table_fixup);
7127   return Address::RIP(table_fixup);
7128 }
7129 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7130 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7131                                              const Address& addr_high,
7132                                              int64_t v,
7133                                              HInstruction* instruction) {
7134   if (IsInt<32>(v)) {
7135     int32_t v_32 = v;
7136     __ movq(addr_low, Immediate(v_32));
7137     MaybeRecordImplicitNullCheck(instruction);
7138   } else {
7139     // Didn't fit in a register.  Do it in pieces.
7140     int32_t low_v = Low32Bits(v);
7141     int32_t high_v = High32Bits(v);
7142     __ movl(addr_low, Immediate(low_v));
7143     MaybeRecordImplicitNullCheck(instruction);
7144     __ movl(addr_high, Immediate(high_v));
7145   }
7146 }
7147 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7148 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7149                                           const uint8_t* roots_data,
7150                                           const PatchInfo<Label>& info,
7151                                           uint64_t index_in_table) const {
7152   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7153   uintptr_t address =
7154       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7155   typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7156   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7157      dchecked_integral_cast<uint32_t>(address);
7158 }
7159 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7160 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7161   for (const PatchInfo<Label>& info : jit_string_patches_) {
7162     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7163     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7164     PatchJitRootUse(code, roots_data, info, index_in_table);
7165   }
7166 
7167   for (const PatchInfo<Label>& info : jit_class_patches_) {
7168     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7169     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7170     PatchJitRootUse(code, roots_data, info, index_in_table);
7171   }
7172 }
7173 
7174 #undef __
7175 
7176 }  // namespace x86_64
7177 }  // namespace art
7178