1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_x86_64.h"
31 #include "jit/profiling_info.h"
32 #include "linker/linker_patch.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/class-inl.h"
36 #include "mirror/object_reference.h"
37 #include "scoped_thread_state_change-inl.h"
38 #include "thread.h"
39 #include "utils/assembler.h"
40 #include "utils/stack_checks.h"
41 #include "utils/x86_64/assembler_x86_64.h"
42 #include "utils/x86_64/constants_x86_64.h"
43 #include "utils/x86_64/managed_register_x86_64.h"
44 
45 namespace art {
46 
47 template<class MirrorType>
48 class GcRoot;
49 
50 namespace x86_64 {
51 
52 static constexpr int kCurrentMethodStackOffset = 0;
53 static constexpr Register kMethodRegisterArgument = RDI;
54 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
55 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
56 // generates less code/data with a small num_entries.
57 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
58 
59 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
60 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
61 
62 static constexpr int kC2ConditionMask = 0x400;
63 
OneRegInReferenceOutSaveEverythingCallerSaves()64 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
65   // Custom calling convention: RAX serves as both input and output.
66   RegisterSet caller_saves = RegisterSet::Empty();
67   caller_saves.Add(Location::RegisterLocation(RAX));
68   return caller_saves;
69 }
70 
71 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
72 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
73 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
74 
75 class NullCheckSlowPathX86_64 : public SlowPathCode {
76  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)77   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
78 
EmitNativeCode(CodeGenerator * codegen)79   void EmitNativeCode(CodeGenerator* codegen) override {
80     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
81     __ Bind(GetEntryLabel());
82     if (instruction_->CanThrowIntoCatchBlock()) {
83       // Live registers will be restored in the catch block if caught.
84       SaveLiveRegisters(codegen, instruction_->GetLocations());
85     }
86     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
87                                   instruction_,
88                                   instruction_->GetDexPc(),
89                                   this);
90     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
91   }
92 
IsFatal() const93   bool IsFatal() const override { return true; }
94 
GetDescription() const95   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
96 
97  private:
98   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
99 };
100 
101 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
102  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)103   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
104 
EmitNativeCode(CodeGenerator * codegen)105   void EmitNativeCode(CodeGenerator* codegen) override {
106     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
107     __ Bind(GetEntryLabel());
108     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
109     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
110   }
111 
IsFatal() const112   bool IsFatal() const override { return true; }
113 
GetDescription() const114   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
115 
116  private:
117   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
118 };
119 
120 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
121  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)122   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
123       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
124 
EmitNativeCode(CodeGenerator * codegen)125   void EmitNativeCode(CodeGenerator* codegen) override {
126     __ Bind(GetEntryLabel());
127     if (type_ == DataType::Type::kInt32) {
128       if (is_div_) {
129         __ negl(cpu_reg_);
130       } else {
131         __ xorl(cpu_reg_, cpu_reg_);
132       }
133 
134     } else {
135       DCHECK_EQ(DataType::Type::kInt64, type_);
136       if (is_div_) {
137         __ negq(cpu_reg_);
138       } else {
139         __ xorl(cpu_reg_, cpu_reg_);
140       }
141     }
142     __ jmp(GetExitLabel());
143   }
144 
GetDescription() const145   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
146 
147  private:
148   const CpuRegister cpu_reg_;
149   const DataType::Type type_;
150   const bool is_div_;
151   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
152 };
153 
154 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
155  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)156   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
157       : SlowPathCode(instruction), successor_(successor) {}
158 
EmitNativeCode(CodeGenerator * codegen)159   void EmitNativeCode(CodeGenerator* codegen) override {
160     LocationSummary* locations = instruction_->GetLocations();
161     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
162     __ Bind(GetEntryLabel());
163     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
164     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
165     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
166     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
167     if (successor_ == nullptr) {
168       __ jmp(GetReturnLabel());
169     } else {
170       __ jmp(x86_64_codegen->GetLabelOf(successor_));
171     }
172   }
173 
GetReturnLabel()174   Label* GetReturnLabel() {
175     DCHECK(successor_ == nullptr);
176     return &return_label_;
177   }
178 
GetSuccessor() const179   HBasicBlock* GetSuccessor() const {
180     return successor_;
181   }
182 
GetDescription() const183   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
184 
185  private:
186   HBasicBlock* const successor_;
187   Label return_label_;
188 
189   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
190 };
191 
192 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
193  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)194   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
195     : SlowPathCode(instruction) {}
196 
EmitNativeCode(CodeGenerator * codegen)197   void EmitNativeCode(CodeGenerator* codegen) override {
198     LocationSummary* locations = instruction_->GetLocations();
199     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
200     __ Bind(GetEntryLabel());
201     if (instruction_->CanThrowIntoCatchBlock()) {
202       // Live registers will be restored in the catch block if caught.
203       SaveLiveRegisters(codegen, instruction_->GetLocations());
204     }
205     // Are we using an array length from memory?
206     HInstruction* array_length = instruction_->InputAt(1);
207     Location length_loc = locations->InAt(1);
208     InvokeRuntimeCallingConvention calling_convention;
209     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
210       // Load the array length into our temporary.
211       HArrayLength* length = array_length->AsArrayLength();
212       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
213       Location array_loc = array_length->GetLocations()->InAt(0);
214       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
215       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
216       // Check for conflicts with index.
217       if (length_loc.Equals(locations->InAt(0))) {
218         // We know we aren't using parameter 2.
219         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
220       }
221       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
222       if (mirror::kUseStringCompression && length->IsStringLength()) {
223         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
224       }
225     }
226 
227     // We're moving two locations to locations that could overlap, so we need a parallel
228     // move resolver.
229     codegen->EmitParallelMoves(
230         locations->InAt(0),
231         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
232         DataType::Type::kInt32,
233         length_loc,
234         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
235         DataType::Type::kInt32);
236     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
237         ? kQuickThrowStringBounds
238         : kQuickThrowArrayBounds;
239     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
240     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
241     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
242   }
243 
IsFatal() const244   bool IsFatal() const override { return true; }
245 
GetDescription() const246   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
247 
248  private:
249   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
250 };
251 
252 class LoadClassSlowPathX86_64 : public SlowPathCode {
253  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)254   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
255       : SlowPathCode(at), cls_(cls) {
256     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
257     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
258   }
259 
EmitNativeCode(CodeGenerator * codegen)260   void EmitNativeCode(CodeGenerator* codegen) override {
261     LocationSummary* locations = instruction_->GetLocations();
262     Location out = locations->Out();
263     const uint32_t dex_pc = instruction_->GetDexPc();
264     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
265     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
266 
267     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
268     __ Bind(GetEntryLabel());
269     SaveLiveRegisters(codegen, locations);
270 
271     // Custom calling convention: RAX serves as both input and output.
272     if (must_resolve_type) {
273       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
274       dex::TypeIndex type_index = cls_->GetTypeIndex();
275       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
276       if (cls_->NeedsAccessCheck()) {
277         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
278         x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
279       } else {
280         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
281         x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
282       }
283       // If we also must_do_clinit, the resolved type is now in the correct register.
284     } else {
285       DCHECK(must_do_clinit);
286       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
287       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
288     }
289     if (must_do_clinit) {
290       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
291       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
292     }
293 
294     // Move the class to the desired location.
295     if (out.IsValid()) {
296       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
297       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
298     }
299 
300     RestoreLiveRegisters(codegen, locations);
301     __ jmp(GetExitLabel());
302   }
303 
GetDescription() const304   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
305 
306  private:
307   // The class this slow path will load.
308   HLoadClass* const cls_;
309 
310   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
311 };
312 
313 class LoadStringSlowPathX86_64 : public SlowPathCode {
314  public:
LoadStringSlowPathX86_64(HLoadString * instruction)315   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
316 
EmitNativeCode(CodeGenerator * codegen)317   void EmitNativeCode(CodeGenerator* codegen) override {
318     LocationSummary* locations = instruction_->GetLocations();
319     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
320 
321     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
322     __ Bind(GetEntryLabel());
323     SaveLiveRegisters(codegen, locations);
324 
325     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
326     // Custom calling convention: RAX serves as both input and output.
327     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
328     x86_64_codegen->InvokeRuntime(kQuickResolveString,
329                                   instruction_,
330                                   instruction_->GetDexPc(),
331                                   this);
332     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
333     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
334     RestoreLiveRegisters(codegen, locations);
335 
336     __ jmp(GetExitLabel());
337   }
338 
GetDescription() const339   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
340 
341  private:
342   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
343 };
344 
345 class TypeCheckSlowPathX86_64 : public SlowPathCode {
346  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)347   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
348       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
349 
EmitNativeCode(CodeGenerator * codegen)350   void EmitNativeCode(CodeGenerator* codegen) override {
351     LocationSummary* locations = instruction_->GetLocations();
352     uint32_t dex_pc = instruction_->GetDexPc();
353     DCHECK(instruction_->IsCheckCast()
354            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
355 
356     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
357     __ Bind(GetEntryLabel());
358 
359     if (kPoisonHeapReferences &&
360         instruction_->IsCheckCast() &&
361         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
362       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
363       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
364     }
365 
366     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
367       SaveLiveRegisters(codegen, locations);
368     }
369 
370     // We're moving two locations to locations that could overlap, so we need a parallel
371     // move resolver.
372     InvokeRuntimeCallingConvention calling_convention;
373     codegen->EmitParallelMoves(locations->InAt(0),
374                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
375                                DataType::Type::kReference,
376                                locations->InAt(1),
377                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
378                                DataType::Type::kReference);
379     if (instruction_->IsInstanceOf()) {
380       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
381       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
382     } else {
383       DCHECK(instruction_->IsCheckCast());
384       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
385       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
386     }
387 
388     if (!is_fatal_) {
389       if (instruction_->IsInstanceOf()) {
390         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
391       }
392 
393       RestoreLiveRegisters(codegen, locations);
394       __ jmp(GetExitLabel());
395     }
396   }
397 
GetDescription() const398   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
399 
IsFatal() const400   bool IsFatal() const override { return is_fatal_; }
401 
402  private:
403   const bool is_fatal_;
404 
405   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
406 };
407 
408 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
409  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)410   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
411       : SlowPathCode(instruction) {}
412 
EmitNativeCode(CodeGenerator * codegen)413   void EmitNativeCode(CodeGenerator* codegen) override {
414     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
415     __ Bind(GetEntryLabel());
416     LocationSummary* locations = instruction_->GetLocations();
417     SaveLiveRegisters(codegen, locations);
418     InvokeRuntimeCallingConvention calling_convention;
419     x86_64_codegen->Load32BitValue(
420         CpuRegister(calling_convention.GetRegisterAt(0)),
421         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
422     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
423     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
424   }
425 
GetDescription() const426   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
427 
428  private:
429   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
430 };
431 
432 class ArraySetSlowPathX86_64 : public SlowPathCode {
433  public:
ArraySetSlowPathX86_64(HInstruction * instruction)434   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
435 
EmitNativeCode(CodeGenerator * codegen)436   void EmitNativeCode(CodeGenerator* codegen) override {
437     LocationSummary* locations = instruction_->GetLocations();
438     __ Bind(GetEntryLabel());
439     SaveLiveRegisters(codegen, locations);
440 
441     InvokeRuntimeCallingConvention calling_convention;
442     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
443     parallel_move.AddMove(
444         locations->InAt(0),
445         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
446         DataType::Type::kReference,
447         nullptr);
448     parallel_move.AddMove(
449         locations->InAt(1),
450         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
451         DataType::Type::kInt32,
452         nullptr);
453     parallel_move.AddMove(
454         locations->InAt(2),
455         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
456         DataType::Type::kReference,
457         nullptr);
458     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
459 
460     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
461     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
462     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
463     RestoreLiveRegisters(codegen, locations);
464     __ jmp(GetExitLabel());
465   }
466 
GetDescription() const467   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
468 
469  private:
470   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
471 };
472 
473 // Slow path marking an object reference `ref` during a read
474 // barrier. The field `obj.field` in the object `obj` holding this
475 // reference does not get updated by this slow path after marking (see
476 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
477 //
478 // This means that after the execution of this slow path, `ref` will
479 // always be up-to-date, but `obj.field` may not; i.e., after the
480 // flip, `ref` will be a to-space reference, but `obj.field` will
481 // probably still be a from-space reference (unless it gets updated by
482 // another thread, or if another thread installed another object
483 // reference (different from `ref`) in `obj.field`).
484 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
485  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)486   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
487                                 Location ref,
488                                 bool unpoison_ref_before_marking)
489       : SlowPathCode(instruction),
490         ref_(ref),
491         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
492     DCHECK(kEmitCompilerReadBarrier);
493   }
494 
GetDescription() const495   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
496 
EmitNativeCode(CodeGenerator * codegen)497   void EmitNativeCode(CodeGenerator* codegen) override {
498     LocationSummary* locations = instruction_->GetLocations();
499     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
500     Register ref_reg = ref_cpu_reg.AsRegister();
501     DCHECK(locations->CanCall());
502     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
503     DCHECK(instruction_->IsInstanceFieldGet() ||
504            instruction_->IsPredicatedInstanceFieldGet() ||
505            instruction_->IsStaticFieldGet() ||
506            instruction_->IsArrayGet() ||
507            instruction_->IsArraySet() ||
508            instruction_->IsLoadClass() ||
509            instruction_->IsLoadString() ||
510            instruction_->IsInstanceOf() ||
511            instruction_->IsCheckCast() ||
512            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
513            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
514         << "Unexpected instruction in read barrier marking slow path: "
515         << instruction_->DebugName();
516 
517     __ Bind(GetEntryLabel());
518     if (unpoison_ref_before_marking_) {
519       // Object* ref = ref_addr->AsMirrorPtr()
520       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
521     }
522     // No need to save live registers; it's taken care of by the
523     // entrypoint. Also, there is no need to update the stack mask,
524     // as this runtime call will not trigger a garbage collection.
525     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526     DCHECK_NE(ref_reg, RSP);
527     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
528     // "Compact" slow path, saving two moves.
529     //
530     // Instead of using the standard runtime calling convention (input
531     // and output in R0):
532     //
533     //   RDI <- ref
534     //   RAX <- ReadBarrierMark(RDI)
535     //   ref <- RAX
536     //
537     // we just use rX (the register containing `ref`) as input and output
538     // of a dedicated entrypoint:
539     //
540     //   rX <- ReadBarrierMarkRegX(rX)
541     //
542     int32_t entry_point_offset =
543         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
544     // This runtime call does not require a stack map.
545     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
546     __ jmp(GetExitLabel());
547   }
548 
549  private:
550   // The location (register) of the marked object reference.
551   const Location ref_;
552   // Should the reference in `ref_` be unpoisoned prior to marking it?
553   const bool unpoison_ref_before_marking_;
554 
555   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
556 };
557 
558 // Slow path marking an object reference `ref` during a read barrier,
559 // and if needed, atomically updating the field `obj.field` in the
560 // object `obj` holding this reference after marking (contrary to
561 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
562 // `obj.field`).
563 //
564 // This means that after the execution of this slow path, both `ref`
565 // and `obj.field` will be up-to-date; i.e., after the flip, both will
566 // hold the same to-space reference (unless another thread installed
567 // another object reference (different from `ref`) in `obj.field`).
568 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
569  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)570   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
571                                               Location ref,
572                                               CpuRegister obj,
573                                               const Address& field_addr,
574                                               bool unpoison_ref_before_marking,
575                                               CpuRegister temp1,
576                                               CpuRegister temp2)
577       : SlowPathCode(instruction),
578         ref_(ref),
579         obj_(obj),
580         field_addr_(field_addr),
581         unpoison_ref_before_marking_(unpoison_ref_before_marking),
582         temp1_(temp1),
583         temp2_(temp2) {
584     DCHECK(kEmitCompilerReadBarrier);
585   }
586 
GetDescription() const587   const char* GetDescription() const override {
588     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
589   }
590 
EmitNativeCode(CodeGenerator * codegen)591   void EmitNativeCode(CodeGenerator* codegen) override {
592     LocationSummary* locations = instruction_->GetLocations();
593     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
594     Register ref_reg = ref_cpu_reg.AsRegister();
595     DCHECK(locations->CanCall());
596     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
597     // This slow path is only used by the UnsafeCASObject intrinsic.
598     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
599         << "Unexpected instruction in read barrier marking and field updating slow path: "
600         << instruction_->DebugName();
601     DCHECK(instruction_->GetLocations()->Intrinsified());
602     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
603 
604     __ Bind(GetEntryLabel());
605     if (unpoison_ref_before_marking_) {
606       // Object* ref = ref_addr->AsMirrorPtr()
607       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
608     }
609 
610     // Save the old (unpoisoned) reference.
611     __ movl(temp1_, ref_cpu_reg);
612 
613     // No need to save live registers; it's taken care of by the
614     // entrypoint. Also, there is no need to update the stack mask,
615     // as this runtime call will not trigger a garbage collection.
616     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
617     DCHECK_NE(ref_reg, RSP);
618     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
619     // "Compact" slow path, saving two moves.
620     //
621     // Instead of using the standard runtime calling convention (input
622     // and output in R0):
623     //
624     //   RDI <- ref
625     //   RAX <- ReadBarrierMark(RDI)
626     //   ref <- RAX
627     //
628     // we just use rX (the register containing `ref`) as input and output
629     // of a dedicated entrypoint:
630     //
631     //   rX <- ReadBarrierMarkRegX(rX)
632     //
633     int32_t entry_point_offset =
634         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
635     // This runtime call does not require a stack map.
636     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
637 
638     // If the new reference is different from the old reference,
639     // update the field in the holder (`*field_addr`).
640     //
641     // Note that this field could also hold a different object, if
642     // another thread had concurrently changed it. In that case, the
643     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
644     // operation below would abort the CAS, leaving the field as-is.
645     NearLabel done;
646     __ cmpl(temp1_, ref_cpu_reg);
647     __ j(kEqual, &done);
648 
649     // Update the the holder's field atomically.  This may fail if
650     // mutator updates before us, but it's OK.  This is achived
651     // using a strong compare-and-set (CAS) operation with relaxed
652     // memory synchronization ordering, where the expected value is
653     // the old reference and the desired value is the new reference.
654     // This operation is implemented with a 32-bit LOCK CMPXLCHG
655     // instruction, which requires the expected value (the old
656     // reference) to be in EAX.  Save RAX beforehand, and move the
657     // expected value (stored in `temp1_`) into EAX.
658     __ movq(temp2_, CpuRegister(RAX));
659     __ movl(CpuRegister(RAX), temp1_);
660 
661     // Convenience aliases.
662     CpuRegister base = obj_;
663     CpuRegister expected = CpuRegister(RAX);
664     CpuRegister value = ref_cpu_reg;
665 
666     bool base_equals_value = (base.AsRegister() == value.AsRegister());
667     Register value_reg = ref_reg;
668     if (kPoisonHeapReferences) {
669       if (base_equals_value) {
670         // If `base` and `value` are the same register location, move
671         // `value_reg` to a temporary register.  This way, poisoning
672         // `value_reg` won't invalidate `base`.
673         value_reg = temp1_.AsRegister();
674         __ movl(CpuRegister(value_reg), base);
675       }
676 
677       // Check that the register allocator did not assign the location
678       // of `expected` (RAX) to `value` nor to `base`, so that heap
679       // poisoning (when enabled) works as intended below.
680       // - If `value` were equal to `expected`, both references would
681       //   be poisoned twice, meaning they would not be poisoned at
682       //   all, as heap poisoning uses address negation.
683       // - If `base` were equal to `expected`, poisoning `expected`
684       //   would invalidate `base`.
685       DCHECK_NE(value_reg, expected.AsRegister());
686       DCHECK_NE(base.AsRegister(), expected.AsRegister());
687 
688       __ PoisonHeapReference(expected);
689       __ PoisonHeapReference(CpuRegister(value_reg));
690     }
691 
692     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
693 
694     // If heap poisoning is enabled, we need to unpoison the values
695     // that were poisoned earlier.
696     if (kPoisonHeapReferences) {
697       if (base_equals_value) {
698         // `value_reg` has been moved to a temporary register, no need
699         // to unpoison it.
700       } else {
701         __ UnpoisonHeapReference(CpuRegister(value_reg));
702       }
703       // No need to unpoison `expected` (RAX), as it is be overwritten below.
704     }
705 
706     // Restore RAX.
707     __ movq(CpuRegister(RAX), temp2_);
708 
709     __ Bind(&done);
710     __ jmp(GetExitLabel());
711   }
712 
713  private:
714   // The location (register) of the marked object reference.
715   const Location ref_;
716   // The register containing the object holding the marked object reference field.
717   const CpuRegister obj_;
718   // The address of the marked reference field.  The base of this address must be `obj_`.
719   const Address field_addr_;
720 
721   // Should the reference in `ref_` be unpoisoned prior to marking it?
722   const bool unpoison_ref_before_marking_;
723 
724   const CpuRegister temp1_;
725   const CpuRegister temp2_;
726 
727   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
728 };
729 
730 // Slow path generating a read barrier for a heap reference.
731 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
732  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)733   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
734                                             Location out,
735                                             Location ref,
736                                             Location obj,
737                                             uint32_t offset,
738                                             Location index)
739       : SlowPathCode(instruction),
740         out_(out),
741         ref_(ref),
742         obj_(obj),
743         offset_(offset),
744         index_(index) {
745     DCHECK(kEmitCompilerReadBarrier);
746     // If `obj` is equal to `out` or `ref`, it means the initial
747     // object has been overwritten by (or after) the heap object
748     // reference load to be instrumented, e.g.:
749     //
750     //   __ movl(out, Address(out, offset));
751     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
752     //
753     // In that case, we have lost the information about the original
754     // object, and the emitted read barrier cannot work properly.
755     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
756     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
757 }
758 
EmitNativeCode(CodeGenerator * codegen)759   void EmitNativeCode(CodeGenerator* codegen) override {
760     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
761     LocationSummary* locations = instruction_->GetLocations();
762     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
763     DCHECK(locations->CanCall());
764     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
765     DCHECK(instruction_->IsInstanceFieldGet() ||
766            instruction_->IsPredicatedInstanceFieldGet() ||
767            instruction_->IsStaticFieldGet() ||
768            instruction_->IsArrayGet() ||
769            instruction_->IsInstanceOf() ||
770            instruction_->IsCheckCast() ||
771            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
772         << "Unexpected instruction in read barrier for heap reference slow path: "
773         << instruction_->DebugName();
774 
775     __ Bind(GetEntryLabel());
776     SaveLiveRegisters(codegen, locations);
777 
778     // We may have to change the index's value, but as `index_` is a
779     // constant member (like other "inputs" of this slow path),
780     // introduce a copy of it, `index`.
781     Location index = index_;
782     if (index_.IsValid()) {
783       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
784       if (instruction_->IsArrayGet()) {
785         // Compute real offset and store it in index_.
786         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
787         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
788         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
789           // We are about to change the value of `index_reg` (see the
790           // calls to art::x86_64::X86_64Assembler::shll and
791           // art::x86_64::X86_64Assembler::AddImmediate below), but it
792           // has not been saved by the previous call to
793           // art::SlowPathCode::SaveLiveRegisters, as it is a
794           // callee-save register --
795           // art::SlowPathCode::SaveLiveRegisters does not consider
796           // callee-save registers, as it has been designed with the
797           // assumption that callee-save registers are supposed to be
798           // handled by the called function.  So, as a callee-save
799           // register, `index_reg` _would_ eventually be saved onto
800           // the stack, but it would be too late: we would have
801           // changed its value earlier.  Therefore, we manually save
802           // it here into another freely available register,
803           // `free_reg`, chosen of course among the caller-save
804           // registers (as a callee-save `free_reg` register would
805           // exhibit the same problem).
806           //
807           // Note we could have requested a temporary register from
808           // the register allocator instead; but we prefer not to, as
809           // this is a slow path, and we know we can find a
810           // caller-save register that is available.
811           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
812           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
813           index_reg = free_reg;
814           index = Location::RegisterLocation(index_reg);
815         } else {
816           // The initial register stored in `index_` has already been
817           // saved in the call to art::SlowPathCode::SaveLiveRegisters
818           // (as it is not a callee-save register), so we can freely
819           // use it.
820         }
821         // Shifting the index value contained in `index_reg` by the
822         // scale factor (2) cannot overflow in practice, as the
823         // runtime is unable to allocate object arrays with a size
824         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
825         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
826         static_assert(
827             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
828             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
829         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
830       } else {
831         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
832         // intrinsics, `index_` is not shifted by a scale factor of 2
833         // (as in the case of ArrayGet), as it is actually an offset
834         // to an object field within an object.
835         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
836         DCHECK(instruction_->GetLocations()->Intrinsified());
837         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
838                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
839             << instruction_->AsInvoke()->GetIntrinsic();
840         DCHECK_EQ(offset_, 0U);
841         DCHECK(index_.IsRegister());
842       }
843     }
844 
845     // We're moving two or three locations to locations that could
846     // overlap, so we need a parallel move resolver.
847     InvokeRuntimeCallingConvention calling_convention;
848     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
849     parallel_move.AddMove(ref_,
850                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
851                           DataType::Type::kReference,
852                           nullptr);
853     parallel_move.AddMove(obj_,
854                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
855                           DataType::Type::kReference,
856                           nullptr);
857     if (index.IsValid()) {
858       parallel_move.AddMove(index,
859                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
860                             DataType::Type::kInt32,
861                             nullptr);
862       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
863     } else {
864       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
865       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
866     }
867     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
868                                   instruction_,
869                                   instruction_->GetDexPc(),
870                                   this);
871     CheckEntrypointTypes<
872         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
873     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
874 
875     RestoreLiveRegisters(codegen, locations);
876     __ jmp(GetExitLabel());
877   }
878 
GetDescription() const879   const char* GetDescription() const override {
880     return "ReadBarrierForHeapReferenceSlowPathX86_64";
881   }
882 
883  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)884   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
885     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
886     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
887     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
888       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
889         return static_cast<CpuRegister>(i);
890       }
891     }
892     // We shall never fail to find a free caller-save register, as
893     // there are more than two core caller-save registers on x86-64
894     // (meaning it is possible to find one which is different from
895     // `ref` and `obj`).
896     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
897     LOG(FATAL) << "Could not find a free caller-save register";
898     UNREACHABLE();
899   }
900 
901   const Location out_;
902   const Location ref_;
903   const Location obj_;
904   const uint32_t offset_;
905   // An additional location containing an index to an array.
906   // Only used for HArrayGet and the UnsafeGetObject &
907   // UnsafeGetObjectVolatile intrinsics.
908   const Location index_;
909 
910   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
911 };
912 
913 // Slow path generating a read barrier for a GC root.
914 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
915  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)916   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
917       : SlowPathCode(instruction), out_(out), root_(root) {
918     DCHECK(kEmitCompilerReadBarrier);
919   }
920 
EmitNativeCode(CodeGenerator * codegen)921   void EmitNativeCode(CodeGenerator* codegen) override {
922     LocationSummary* locations = instruction_->GetLocations();
923     DCHECK(locations->CanCall());
924     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
925     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
926         << "Unexpected instruction in read barrier for GC root slow path: "
927         << instruction_->DebugName();
928 
929     __ Bind(GetEntryLabel());
930     SaveLiveRegisters(codegen, locations);
931 
932     InvokeRuntimeCallingConvention calling_convention;
933     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
934     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
935     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
936                                   instruction_,
937                                   instruction_->GetDexPc(),
938                                   this);
939     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
940     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
941 
942     RestoreLiveRegisters(codegen, locations);
943     __ jmp(GetExitLabel());
944   }
945 
GetDescription() const946   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
947 
948  private:
949   const Location out_;
950   const Location root_;
951 
952   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
953 };
954 
955 #undef __
956 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
957 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
958 
X86_64IntegerCondition(IfCondition cond)959 inline Condition X86_64IntegerCondition(IfCondition cond) {
960   switch (cond) {
961     case kCondEQ: return kEqual;
962     case kCondNE: return kNotEqual;
963     case kCondLT: return kLess;
964     case kCondLE: return kLessEqual;
965     case kCondGT: return kGreater;
966     case kCondGE: return kGreaterEqual;
967     case kCondB:  return kBelow;
968     case kCondBE: return kBelowEqual;
969     case kCondA:  return kAbove;
970     case kCondAE: return kAboveEqual;
971   }
972   LOG(FATAL) << "Unreachable";
973   UNREACHABLE();
974 }
975 
976 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)977 inline Condition X86_64FPCondition(IfCondition cond) {
978   switch (cond) {
979     case kCondEQ: return kEqual;
980     case kCondNE: return kNotEqual;
981     case kCondLT: return kBelow;
982     case kCondLE: return kBelowEqual;
983     case kCondGT: return kAbove;
984     case kCondGE: return kAboveEqual;
985     default:      break;  // should not happen
986   }
987   LOG(FATAL) << "Unreachable";
988   UNREACHABLE();
989 }
990 
BlockNonVolatileXmmRegisters(LocationSummary * locations)991 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
992   // We have to ensure that the native code we call directly (such as @CriticalNative
993   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
994   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
995   // that they are saved in the prologue and properly restored.
996   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
997     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
998   }
999 }
1000 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)1001 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1002       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1003       ArtMethod* method ATTRIBUTE_UNUSED) {
1004   return desired_dispatch_info;
1005 }
1006 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1007 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1008   switch (load_kind) {
1009     case MethodLoadKind::kBootImageLinkTimePcRelative:
1010       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1011       __ leal(temp.AsRegister<CpuRegister>(),
1012               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1013       RecordBootImageMethodPatch(invoke);
1014       break;
1015     case MethodLoadKind::kBootImageRelRo: {
1016       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1017       __ movl(temp.AsRegister<CpuRegister>(),
1018               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1019       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1020       break;
1021     }
1022     case MethodLoadKind::kBssEntry: {
1023       __ movq(temp.AsRegister<CpuRegister>(),
1024               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1025       RecordMethodBssEntryPatch(invoke);
1026       // No need for memory fence, thanks to the x86-64 memory model.
1027       break;
1028     }
1029     case MethodLoadKind::kJitDirectAddress: {
1030       Load64BitValue(temp.AsRegister<CpuRegister>(),
1031                      reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1032       break;
1033     }
1034     case MethodLoadKind::kRuntimeCall: {
1035       // Test situation, don't do anything.
1036       break;
1037     }
1038     default: {
1039       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1040       UNREACHABLE();
1041     }
1042   }
1043 }
1044 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1045 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1046     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1047   // All registers are assumed to be correctly set up.
1048 
1049   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1050   switch (invoke->GetMethodLoadKind()) {
1051     case MethodLoadKind::kStringInit: {
1052       // temp = thread->string_init_entrypoint
1053       uint32_t offset =
1054           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1055       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1056       break;
1057     }
1058     case MethodLoadKind::kRecursive: {
1059       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1060       break;
1061     }
1062     case MethodLoadKind::kRuntimeCall: {
1063       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1064       return;  // No code pointer retrieval; the runtime performs the call directly.
1065     }
1066     case MethodLoadKind::kBootImageLinkTimePcRelative:
1067       // For kCallCriticalNative we skip loading the method and do the call directly.
1068       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1069         break;
1070       }
1071       FALLTHROUGH_INTENDED;
1072     default: {
1073       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1074       break;
1075     }
1076   }
1077 
1078   switch (invoke->GetCodePtrLocation()) {
1079     case CodePtrLocation::kCallSelf:
1080       __ call(&frame_entry_label_);
1081       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1082       break;
1083     case CodePtrLocation::kCallCriticalNative: {
1084       size_t out_frame_size =
1085           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1086                                     kNativeStackAlignment,
1087                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1088       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1089         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1090         __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1091         RecordBootImageJniEntrypointPatch(invoke);
1092       } else {
1093         // (callee_method + offset_of_jni_entry_point)()
1094         __ call(Address(callee_method.AsRegister<CpuRegister>(),
1095                          ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1096       }
1097       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1098       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1099       switch (invoke->GetType()) {
1100         case DataType::Type::kBool:
1101           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1102           break;
1103         case DataType::Type::kInt8:
1104           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1105           break;
1106         case DataType::Type::kUint16:
1107           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1108           break;
1109         case DataType::Type::kInt16:
1110           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1111           break;
1112         case DataType::Type::kInt32:
1113         case DataType::Type::kInt64:
1114         case DataType::Type::kFloat32:
1115         case DataType::Type::kFloat64:
1116         case DataType::Type::kVoid:
1117           break;
1118         default:
1119           DCHECK(false) << invoke->GetType();
1120           break;
1121       }
1122       if (out_frame_size != 0u) {
1123         DecreaseFrame(out_frame_size);
1124       }
1125       break;
1126     }
1127     case CodePtrLocation::kCallArtMethod:
1128       // (callee_method + offset_of_quick_compiled_code)()
1129       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1130                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1131                           kX86_64PointerSize).SizeValue()));
1132       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1133       break;
1134   }
1135 
1136   DCHECK(!IsLeafMethod());
1137 }
1138 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1139 void CodeGeneratorX86_64::GenerateVirtualCall(
1140     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1141   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1142   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1143       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1144 
1145   // Use the calling convention instead of the location of the receiver, as
1146   // intrinsics may have put the receiver in a different register. In the intrinsics
1147   // slow path, the arguments have been moved to the right place, so here we are
1148   // guaranteed that the receiver is the first register of the calling convention.
1149   InvokeDexCallingConvention calling_convention;
1150   Register receiver = calling_convention.GetRegisterAt(0);
1151 
1152   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1153   // /* HeapReference<Class> */ temp = receiver->klass_
1154   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1155   MaybeRecordImplicitNullCheck(invoke);
1156   // Instead of simply (possibly) unpoisoning `temp` here, we should
1157   // emit a read barrier for the previous class reference load.
1158   // However this is not required in practice, as this is an
1159   // intermediate/temporary reference and because the current
1160   // concurrent copying collector keeps the from-space memory
1161   // intact/accessible until the end of the marking phase (the
1162   // concurrent copying collector may not in the future).
1163   __ MaybeUnpoisonHeapReference(temp);
1164 
1165   MaybeGenerateInlineCacheCheck(invoke, temp);
1166 
1167   // temp = temp->GetMethodAt(method_offset);
1168   __ movq(temp, Address(temp, method_offset));
1169   // call temp->GetEntryPoint();
1170   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1171       kX86_64PointerSize).SizeValue()));
1172   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1173 }
1174 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1175 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1176   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1177   __ Bind(&boot_image_other_patches_.back().label);
1178 }
1179 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1180 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1181   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1182   __ Bind(&boot_image_other_patches_.back().label);
1183 }
1184 
RecordBootImageMethodPatch(HInvoke * invoke)1185 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1186   boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1187                                           invoke->GetResolvedMethodReference().index);
1188   __ Bind(&boot_image_method_patches_.back().label);
1189 }
1190 
RecordMethodBssEntryPatch(HInvoke * invoke)1191 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1192   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file));
1193   method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1194                                          invoke->GetMethodReference().index);
1195   __ Bind(&method_bss_entry_patches_.back().label);
1196 }
1197 
RecordBootImageTypePatch(HLoadClass * load_class)1198 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1199   boot_image_type_patches_.emplace_back(
1200       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1201   __ Bind(&boot_image_type_patches_.back().label);
1202 }
1203 
NewTypeBssEntryPatch(HLoadClass * load_class)1204 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1205   ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1206   switch (load_class->GetLoadKind()) {
1207     case HLoadClass::LoadKind::kBssEntry:
1208       patches = &type_bss_entry_patches_;
1209       break;
1210     case HLoadClass::LoadKind::kBssEntryPublic:
1211       patches = &public_type_bss_entry_patches_;
1212       break;
1213     case HLoadClass::LoadKind::kBssEntryPackage:
1214       patches = &package_type_bss_entry_patches_;
1215       break;
1216     default:
1217       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1218       UNREACHABLE();
1219   }
1220   patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1221   return &patches->back().label;
1222 }
1223 
RecordBootImageStringPatch(HLoadString * load_string)1224 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1225   boot_image_string_patches_.emplace_back(
1226       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1227   __ Bind(&boot_image_string_patches_.back().label);
1228 }
1229 
NewStringBssEntryPatch(HLoadString * load_string)1230 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1231   string_bss_entry_patches_.emplace_back(
1232       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1233   return &string_bss_entry_patches_.back().label;
1234 }
1235 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1236 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1237   boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1238                                                   invoke->GetResolvedMethodReference().index);
1239   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1240 }
1241 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1242 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1243   if (GetCompilerOptions().IsBootImage()) {
1244     __ leal(reg,
1245             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1246     RecordBootImageIntrinsicPatch(boot_image_reference);
1247   } else if (GetCompilerOptions().GetCompilePic()) {
1248     __ movl(reg,
1249             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1250     RecordBootImageRelRoPatch(boot_image_reference);
1251   } else {
1252     DCHECK(GetCompilerOptions().IsJitCompiler());
1253     gc::Heap* heap = Runtime::Current()->GetHeap();
1254     DCHECK(!heap->GetBootImageSpaces().empty());
1255     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1256     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1257   }
1258 }
1259 
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1260 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1261   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1262   if (GetCompilerOptions().IsBootImage()) {
1263     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1264     __ leal(reg,
1265             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1266     MethodReference target_method = invoke->GetResolvedMethodReference();
1267     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1268     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1269     __ Bind(&boot_image_type_patches_.back().label);
1270   } else {
1271     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1272     LoadBootImageAddress(reg, boot_image_offset);
1273   }
1274 }
1275 
1276 // The label points to the end of the "movl" or another instruction but the literal offset
1277 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1278 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1279 
1280 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1281 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1282     const ArenaDeque<PatchInfo<Label>>& infos,
1283     ArenaVector<linker::LinkerPatch>* linker_patches) {
1284   for (const PatchInfo<Label>& info : infos) {
1285     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1286     linker_patches->push_back(
1287         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1288   }
1289 }
1290 
1291 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1292 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1293                                      const DexFile* target_dex_file,
1294                                      uint32_t pc_insn_offset,
1295                                      uint32_t boot_image_offset) {
1296   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1297   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1298 }
1299 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1300 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1301   DCHECK(linker_patches->empty());
1302   size_t size =
1303       boot_image_method_patches_.size() +
1304       method_bss_entry_patches_.size() +
1305       boot_image_type_patches_.size() +
1306       type_bss_entry_patches_.size() +
1307       public_type_bss_entry_patches_.size() +
1308       package_type_bss_entry_patches_.size() +
1309       boot_image_string_patches_.size() +
1310       string_bss_entry_patches_.size() +
1311       boot_image_jni_entrypoint_patches_.size() +
1312       boot_image_other_patches_.size();
1313   linker_patches->reserve(size);
1314   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1315     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1316         boot_image_method_patches_, linker_patches);
1317     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1318         boot_image_type_patches_, linker_patches);
1319     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1320         boot_image_string_patches_, linker_patches);
1321   } else {
1322     DCHECK(boot_image_method_patches_.empty());
1323     DCHECK(boot_image_type_patches_.empty());
1324     DCHECK(boot_image_string_patches_.empty());
1325   }
1326   if (GetCompilerOptions().IsBootImage()) {
1327     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1328         boot_image_other_patches_, linker_patches);
1329   } else {
1330     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1331         boot_image_other_patches_, linker_patches);
1332   }
1333   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1334       method_bss_entry_patches_, linker_patches);
1335   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1336       type_bss_entry_patches_, linker_patches);
1337   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1338       public_type_bss_entry_patches_, linker_patches);
1339   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1340       package_type_bss_entry_patches_, linker_patches);
1341   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1342       string_bss_entry_patches_, linker_patches);
1343   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1344       boot_image_jni_entrypoint_patches_, linker_patches);
1345   DCHECK_EQ(size, linker_patches->size());
1346 }
1347 
DumpCoreRegister(std::ostream & stream,int reg) const1348 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1349   stream << Register(reg);
1350 }
1351 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1352 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1353   stream << FloatRegister(reg);
1354 }
1355 
GetInstructionSetFeatures() const1356 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1357   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1358 }
1359 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1360 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1361   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1362   return kX86_64WordSize;
1363 }
1364 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1365 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1366   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1367   return kX86_64WordSize;
1368 }
1369 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1370 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1371   if (GetGraph()->HasSIMD()) {
1372     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1373   } else {
1374     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1375   }
1376   return GetSlowPathFPWidth();
1377 }
1378 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1379 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1380   if (GetGraph()->HasSIMD()) {
1381     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1382   } else {
1383     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1384   }
1385   return GetSlowPathFPWidth();
1386 }
1387 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1388 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1389                                         HInstruction* instruction,
1390                                         uint32_t dex_pc,
1391                                         SlowPathCode* slow_path) {
1392   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1393   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1394   if (EntrypointRequiresStackMap(entrypoint)) {
1395     RecordPcInfo(instruction, dex_pc, slow_path);
1396   }
1397 }
1398 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1399 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1400                                                               HInstruction* instruction,
1401                                                               SlowPathCode* slow_path) {
1402   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1403   GenerateInvokeRuntime(entry_point_offset);
1404 }
1405 
GenerateInvokeRuntime(int32_t entry_point_offset)1406 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1407   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1408 }
1409 
1410 static constexpr int kNumberOfCpuRegisterPairs = 0;
1411 // Use a fake return address register to mimic Quick.
1412 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1413 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1414                                          const CompilerOptions& compiler_options,
1415                                          OptimizingCompilerStats* stats)
1416     : CodeGenerator(graph,
1417                     kNumberOfCpuRegisters,
1418                     kNumberOfFloatRegisters,
1419                     kNumberOfCpuRegisterPairs,
1420                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1421                                         arraysize(kCoreCalleeSaves))
1422                         | (1 << kFakeReturnRegister),
1423                     ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1424                                         arraysize(kFpuCalleeSaves)),
1425                     compiler_options,
1426                     stats),
1427       block_labels_(nullptr),
1428       location_builder_(graph, this),
1429       instruction_visitor_(graph, this),
1430       move_resolver_(graph->GetAllocator(), this),
1431       assembler_(graph->GetAllocator()),
1432       constant_area_start_(0),
1433       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1434       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1435       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1436       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1437       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1438       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1439       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1440       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1441       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1442       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1443       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1444       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1445       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1446   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1447 }
1448 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1449 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1450                                                                CodeGeneratorX86_64* codegen)
1451       : InstructionCodeGenerator(graph, codegen),
1452         assembler_(codegen->GetAssembler()),
1453         codegen_(codegen) {}
1454 
SetupBlockedRegisters() const1455 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1456   // Stack register is always reserved.
1457   blocked_core_registers_[RSP] = true;
1458 
1459   // Block the register used as TMP.
1460   blocked_core_registers_[TMP] = true;
1461 }
1462 
DWARFReg(Register reg)1463 static dwarf::Reg DWARFReg(Register reg) {
1464   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1465 }
1466 
DWARFReg(FloatRegister reg)1467 static dwarf::Reg DWARFReg(FloatRegister reg) {
1468   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1469 }
1470 
MaybeIncrementHotness(bool is_frame_entry)1471 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1472   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1473     NearLabel overflow;
1474     Register method = kMethodRegisterArgument;
1475     if (!is_frame_entry) {
1476       CHECK(RequiresCurrentMethod());
1477       method = TMP;
1478       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1479     }
1480     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1481             Immediate(ArtMethod::MaxCounter()));
1482     __ j(kEqual, &overflow);
1483     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1484             Immediate(1));
1485     __ Bind(&overflow);
1486   }
1487 
1488   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1489     ScopedProfilingInfoUse spiu(
1490         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
1491     ProfilingInfo* info = spiu.GetProfilingInfo();
1492     if (info != nullptr) {
1493       uint64_t address = reinterpret_cast64<uint64_t>(info);
1494       NearLabel done;
1495       __ movq(CpuRegister(TMP), Immediate(address));
1496       __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1497               Immediate(1));
1498       __ andw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1499               Immediate(interpreter::kTieredHotnessMask));
1500       __ j(kNotZero, &done);
1501       if (HasEmptyFrame()) {
1502         CHECK(is_frame_entry);
1503         // Frame alignment, and the stub expects the method on the stack.
1504         __ pushq(CpuRegister(RDI));
1505         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1506         __ cfi().RelOffset(DWARFReg(RDI), 0);
1507       } else if (!RequiresCurrentMethod()) {
1508         CHECK(is_frame_entry);
1509         __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
1510       }
1511       GenerateInvokeRuntime(
1512           GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1513       if (HasEmptyFrame()) {
1514         __ popq(CpuRegister(RDI));
1515         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1516         __ cfi().Restore(DWARFReg(RDI));
1517       }
1518       __ Bind(&done);
1519     }
1520   }
1521 }
1522 
GenerateFrameEntry()1523 void CodeGeneratorX86_64::GenerateFrameEntry() {
1524   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1525   __ Bind(&frame_entry_label_);
1526   bool skip_overflow_check = IsLeafMethod()
1527       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1528   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1529 
1530 
1531   if (!skip_overflow_check) {
1532     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1533     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1534     RecordPcInfo(nullptr, 0);
1535   }
1536 
1537   if (!HasEmptyFrame()) {
1538     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1539       Register reg = kCoreCalleeSaves[i];
1540       if (allocated_registers_.ContainsCoreRegister(reg)) {
1541         __ pushq(CpuRegister(reg));
1542         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1543         __ cfi().RelOffset(DWARFReg(reg), 0);
1544       }
1545     }
1546 
1547     int adjust = GetFrameSize() - GetCoreSpillSize();
1548     IncreaseFrame(adjust);
1549     uint32_t xmm_spill_location = GetFpuSpillStart();
1550     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1551 
1552     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1553       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1554         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1555         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1556         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1557       }
1558     }
1559 
1560     // Save the current method if we need it. Note that we do not
1561     // do this in HCurrentMethod, as the instruction might have been removed
1562     // in the SSA graph.
1563     if (RequiresCurrentMethod()) {
1564       CHECK(!HasEmptyFrame());
1565       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1566               CpuRegister(kMethodRegisterArgument));
1567     }
1568 
1569     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1570       CHECK(!HasEmptyFrame());
1571       // Initialize should_deoptimize flag to 0.
1572       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1573     }
1574   }
1575 
1576   MaybeIncrementHotness(/* is_frame_entry= */ true);
1577 }
1578 
GenerateFrameExit()1579 void CodeGeneratorX86_64::GenerateFrameExit() {
1580   __ cfi().RememberState();
1581   if (!HasEmptyFrame()) {
1582     uint32_t xmm_spill_location = GetFpuSpillStart();
1583     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1584     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1585       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1586         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1587         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1588         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1589       }
1590     }
1591 
1592     int adjust = GetFrameSize() - GetCoreSpillSize();
1593     DecreaseFrame(adjust);
1594 
1595     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1596       Register reg = kCoreCalleeSaves[i];
1597       if (allocated_registers_.ContainsCoreRegister(reg)) {
1598         __ popq(CpuRegister(reg));
1599         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1600         __ cfi().Restore(DWARFReg(reg));
1601       }
1602     }
1603   }
1604   __ ret();
1605   __ cfi().RestoreState();
1606   __ cfi().DefCFAOffset(GetFrameSize());
1607 }
1608 
Bind(HBasicBlock * block)1609 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1610   __ Bind(GetLabelOf(block));
1611 }
1612 
Move(Location destination,Location source)1613 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1614   if (source.Equals(destination)) {
1615     return;
1616   }
1617   if (destination.IsRegister()) {
1618     CpuRegister dest = destination.AsRegister<CpuRegister>();
1619     if (source.IsRegister()) {
1620       __ movq(dest, source.AsRegister<CpuRegister>());
1621     } else if (source.IsFpuRegister()) {
1622       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1623     } else if (source.IsStackSlot()) {
1624       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1625     } else if (source.IsConstant()) {
1626       HConstant* constant = source.GetConstant();
1627       if (constant->IsLongConstant()) {
1628         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1629       } else {
1630         Load32BitValue(dest, GetInt32ValueOf(constant));
1631       }
1632     } else {
1633       DCHECK(source.IsDoubleStackSlot());
1634       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1635     }
1636   } else if (destination.IsFpuRegister()) {
1637     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1638     if (source.IsRegister()) {
1639       __ movd(dest, source.AsRegister<CpuRegister>());
1640     } else if (source.IsFpuRegister()) {
1641       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1642     } else if (source.IsConstant()) {
1643       HConstant* constant = source.GetConstant();
1644       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1645       if (constant->IsFloatConstant()) {
1646         Load32BitValue(dest, static_cast<int32_t>(value));
1647       } else {
1648         Load64BitValue(dest, value);
1649       }
1650     } else if (source.IsStackSlot()) {
1651       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1652     } else {
1653       DCHECK(source.IsDoubleStackSlot());
1654       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1655     }
1656   } else if (destination.IsStackSlot()) {
1657     if (source.IsRegister()) {
1658       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1659               source.AsRegister<CpuRegister>());
1660     } else if (source.IsFpuRegister()) {
1661       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1662                source.AsFpuRegister<XmmRegister>());
1663     } else if (source.IsConstant()) {
1664       HConstant* constant = source.GetConstant();
1665       int32_t value = GetInt32ValueOf(constant);
1666       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1667     } else {
1668       DCHECK(source.IsStackSlot()) << source;
1669       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1670       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1671     }
1672   } else {
1673     DCHECK(destination.IsDoubleStackSlot());
1674     if (source.IsRegister()) {
1675       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1676               source.AsRegister<CpuRegister>());
1677     } else if (source.IsFpuRegister()) {
1678       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1679                source.AsFpuRegister<XmmRegister>());
1680     } else if (source.IsConstant()) {
1681       HConstant* constant = source.GetConstant();
1682       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1683       int64_t value = GetInt64ValueOf(constant);
1684       Store64BitValueToStack(destination, value);
1685     } else {
1686       DCHECK(source.IsDoubleStackSlot());
1687       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1688       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1689     }
1690   }
1691 }
1692 
MoveConstant(Location location,int32_t value)1693 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1694   DCHECK(location.IsRegister());
1695   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1696 }
1697 
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1698 void CodeGeneratorX86_64::MoveLocation(
1699     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1700   Move(dst, src);
1701 }
1702 
AddLocationAsTemp(Location location,LocationSummary * locations)1703 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1704   if (location.IsRegister()) {
1705     locations->AddTemp(location);
1706   } else {
1707     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1708   }
1709 }
1710 
HandleGoto(HInstruction * got,HBasicBlock * successor)1711 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1712   if (successor->IsExitBlock()) {
1713     DCHECK(got->GetPrevious()->AlwaysThrows());
1714     return;  // no code needed
1715   }
1716 
1717   HBasicBlock* block = got->GetBlock();
1718   HInstruction* previous = got->GetPrevious();
1719 
1720   HLoopInformation* info = block->GetLoopInformation();
1721   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1722     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1723     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1724     return;
1725   }
1726 
1727   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1728     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1729   }
1730   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1731     __ jmp(codegen_->GetLabelOf(successor));
1732   }
1733 }
1734 
VisitGoto(HGoto * got)1735 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1736   got->SetLocations(nullptr);
1737 }
1738 
VisitGoto(HGoto * got)1739 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1740   HandleGoto(got, got->GetSuccessor());
1741 }
1742 
VisitTryBoundary(HTryBoundary * try_boundary)1743 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1744   try_boundary->SetLocations(nullptr);
1745 }
1746 
VisitTryBoundary(HTryBoundary * try_boundary)1747 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1748   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1749   if (!successor->IsExitBlock()) {
1750     HandleGoto(try_boundary, successor);
1751   }
1752 }
1753 
VisitExit(HExit * exit)1754 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1755   exit->SetLocations(nullptr);
1756 }
1757 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1758 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1759 }
1760 
1761 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1762 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1763                                                      LabelType* true_label,
1764                                                      LabelType* false_label) {
1765   if (cond->IsFPConditionTrueIfNaN()) {
1766     __ j(kUnordered, true_label);
1767   } else if (cond->IsFPConditionFalseIfNaN()) {
1768     __ j(kUnordered, false_label);
1769   }
1770   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1771 }
1772 
GenerateCompareTest(HCondition * condition)1773 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1774   LocationSummary* locations = condition->GetLocations();
1775 
1776   Location left = locations->InAt(0);
1777   Location right = locations->InAt(1);
1778   DataType::Type type = condition->InputAt(0)->GetType();
1779   switch (type) {
1780     case DataType::Type::kBool:
1781     case DataType::Type::kUint8:
1782     case DataType::Type::kInt8:
1783     case DataType::Type::kUint16:
1784     case DataType::Type::kInt16:
1785     case DataType::Type::kInt32:
1786     case DataType::Type::kReference: {
1787       codegen_->GenerateIntCompare(left, right);
1788       break;
1789     }
1790     case DataType::Type::kInt64: {
1791       codegen_->GenerateLongCompare(left, right);
1792       break;
1793     }
1794     case DataType::Type::kFloat32: {
1795       if (right.IsFpuRegister()) {
1796         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1797       } else if (right.IsConstant()) {
1798         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1799                    codegen_->LiteralFloatAddress(
1800                      right.GetConstant()->AsFloatConstant()->GetValue()));
1801       } else {
1802         DCHECK(right.IsStackSlot());
1803         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1804                    Address(CpuRegister(RSP), right.GetStackIndex()));
1805       }
1806       break;
1807     }
1808     case DataType::Type::kFloat64: {
1809       if (right.IsFpuRegister()) {
1810         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1811       } else if (right.IsConstant()) {
1812         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1813                    codegen_->LiteralDoubleAddress(
1814                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1815       } else {
1816         DCHECK(right.IsDoubleStackSlot());
1817         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1818                    Address(CpuRegister(RSP), right.GetStackIndex()));
1819       }
1820       break;
1821     }
1822     default:
1823       LOG(FATAL) << "Unexpected condition type " << type;
1824   }
1825 }
1826 
1827 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1828 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1829                                                                   LabelType* true_target_in,
1830                                                                   LabelType* false_target_in) {
1831   // Generated branching requires both targets to be explicit. If either of the
1832   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1833   LabelType fallthrough_target;
1834   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1835   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1836 
1837   // Generate the comparison to set the CC.
1838   GenerateCompareTest(condition);
1839 
1840   // Now generate the correct jump(s).
1841   DataType::Type type = condition->InputAt(0)->GetType();
1842   switch (type) {
1843     case DataType::Type::kInt64: {
1844       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1845       break;
1846     }
1847     case DataType::Type::kFloat32: {
1848       GenerateFPJumps(condition, true_target, false_target);
1849       break;
1850     }
1851     case DataType::Type::kFloat64: {
1852       GenerateFPJumps(condition, true_target, false_target);
1853       break;
1854     }
1855     default:
1856       LOG(FATAL) << "Unexpected condition type " << type;
1857   }
1858 
1859   if (false_target != &fallthrough_target) {
1860     __ jmp(false_target);
1861   }
1862 
1863   if (fallthrough_target.IsLinked()) {
1864     __ Bind(&fallthrough_target);
1865   }
1866 }
1867 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1868 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1869   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1870   // are set only strictly before `branch`. We can't use the eflags on long
1871   // conditions if they are materialized due to the complex branching.
1872   return cond->IsCondition() &&
1873          cond->GetNext() == branch &&
1874          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1875 }
1876 
1877 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1878 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1879                                                            size_t condition_input_index,
1880                                                            LabelType* true_target,
1881                                                            LabelType* false_target) {
1882   HInstruction* cond = instruction->InputAt(condition_input_index);
1883 
1884   if (true_target == nullptr && false_target == nullptr) {
1885     // Nothing to do. The code always falls through.
1886     return;
1887   } else if (cond->IsIntConstant()) {
1888     // Constant condition, statically compared against "true" (integer value 1).
1889     if (cond->AsIntConstant()->IsTrue()) {
1890       if (true_target != nullptr) {
1891         __ jmp(true_target);
1892       }
1893     } else {
1894       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1895       if (false_target != nullptr) {
1896         __ jmp(false_target);
1897       }
1898     }
1899     return;
1900   }
1901 
1902   // The following code generates these patterns:
1903   //  (1) true_target == nullptr && false_target != nullptr
1904   //        - opposite condition true => branch to false_target
1905   //  (2) true_target != nullptr && false_target == nullptr
1906   //        - condition true => branch to true_target
1907   //  (3) true_target != nullptr && false_target != nullptr
1908   //        - condition true => branch to true_target
1909   //        - branch to false_target
1910   if (IsBooleanValueOrMaterializedCondition(cond)) {
1911     if (AreEflagsSetFrom(cond, instruction)) {
1912       if (true_target == nullptr) {
1913         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1914       } else {
1915         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1916       }
1917     } else {
1918       // Materialized condition, compare against 0.
1919       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1920       if (lhs.IsRegister()) {
1921         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1922       } else {
1923         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1924       }
1925       if (true_target == nullptr) {
1926         __ j(kEqual, false_target);
1927       } else {
1928         __ j(kNotEqual, true_target);
1929       }
1930     }
1931   } else {
1932     // Condition has not been materialized, use its inputs as the
1933     // comparison and its condition as the branch condition.
1934     HCondition* condition = cond->AsCondition();
1935 
1936     // If this is a long or FP comparison that has been folded into
1937     // the HCondition, generate the comparison directly.
1938     DataType::Type type = condition->InputAt(0)->GetType();
1939     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1940       GenerateCompareTestAndBranch(condition, true_target, false_target);
1941       return;
1942     }
1943 
1944     Location lhs = condition->GetLocations()->InAt(0);
1945     Location rhs = condition->GetLocations()->InAt(1);
1946     codegen_->GenerateIntCompare(lhs, rhs);
1947       if (true_target == nullptr) {
1948       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1949     } else {
1950       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1951     }
1952   }
1953 
1954   // If neither branch falls through (case 3), the conditional branch to `true_target`
1955   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1956   if (true_target != nullptr && false_target != nullptr) {
1957     __ jmp(false_target);
1958   }
1959 }
1960 
VisitIf(HIf * if_instr)1961 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1962   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1963   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1964     locations->SetInAt(0, Location::Any());
1965   }
1966 }
1967 
VisitIf(HIf * if_instr)1968 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1969   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1970   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1971   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1972       nullptr : codegen_->GetLabelOf(true_successor);
1973   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1974       nullptr : codegen_->GetLabelOf(false_successor);
1975   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1976 }
1977 
VisitDeoptimize(HDeoptimize * deoptimize)1978 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1979   LocationSummary* locations = new (GetGraph()->GetAllocator())
1980       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1981   InvokeRuntimeCallingConvention calling_convention;
1982   RegisterSet caller_saves = RegisterSet::Empty();
1983   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1984   locations->SetCustomSlowPathCallerSaves(caller_saves);
1985   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1986     locations->SetInAt(0, Location::Any());
1987   }
1988 }
1989 
VisitDeoptimize(HDeoptimize * deoptimize)1990 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1991   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1992   GenerateTestAndBranch<Label>(deoptimize,
1993                                /* condition_input_index= */ 0,
1994                                slow_path->GetEntryLabel(),
1995                                /* false_target= */ nullptr);
1996 }
1997 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1998 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1999   LocationSummary* locations = new (GetGraph()->GetAllocator())
2000       LocationSummary(flag, LocationSummary::kNoCall);
2001   locations->SetOut(Location::RequiresRegister());
2002 }
2003 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2004 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2005   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2006           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2007 }
2008 
SelectCanUseCMOV(HSelect * select)2009 static bool SelectCanUseCMOV(HSelect* select) {
2010   // There are no conditional move instructions for XMMs.
2011   if (DataType::IsFloatingPointType(select->GetType())) {
2012     return false;
2013   }
2014 
2015   // A FP condition doesn't generate the single CC that we need.
2016   HInstruction* condition = select->GetCondition();
2017   if (condition->IsCondition() &&
2018       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2019     return false;
2020   }
2021 
2022   // We can generate a CMOV for this Select.
2023   return true;
2024 }
2025 
VisitSelect(HSelect * select)2026 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2027   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2028   if (DataType::IsFloatingPointType(select->GetType())) {
2029     locations->SetInAt(0, Location::RequiresFpuRegister());
2030     locations->SetInAt(1, Location::Any());
2031   } else {
2032     locations->SetInAt(0, Location::RequiresRegister());
2033     if (SelectCanUseCMOV(select)) {
2034       if (select->InputAt(1)->IsConstant()) {
2035         locations->SetInAt(1, Location::RequiresRegister());
2036       } else {
2037         locations->SetInAt(1, Location::Any());
2038       }
2039     } else {
2040       locations->SetInAt(1, Location::Any());
2041     }
2042   }
2043   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2044     locations->SetInAt(2, Location::RequiresRegister());
2045   }
2046   locations->SetOut(Location::SameAsFirstInput());
2047 }
2048 
VisitSelect(HSelect * select)2049 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2050   LocationSummary* locations = select->GetLocations();
2051   if (SelectCanUseCMOV(select)) {
2052     // If both the condition and the source types are integer, we can generate
2053     // a CMOV to implement Select.
2054     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2055     Location value_true_loc = locations->InAt(1);
2056     DCHECK(locations->InAt(0).Equals(locations->Out()));
2057 
2058     HInstruction* select_condition = select->GetCondition();
2059     Condition cond = kNotEqual;
2060 
2061     // Figure out how to test the 'condition'.
2062     if (select_condition->IsCondition()) {
2063       HCondition* condition = select_condition->AsCondition();
2064       if (!condition->IsEmittedAtUseSite()) {
2065         // This was a previously materialized condition.
2066         // Can we use the existing condition code?
2067         if (AreEflagsSetFrom(condition, select)) {
2068           // Materialization was the previous instruction.  Condition codes are right.
2069           cond = X86_64IntegerCondition(condition->GetCondition());
2070         } else {
2071           // No, we have to recreate the condition code.
2072           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2073           __ testl(cond_reg, cond_reg);
2074         }
2075       } else {
2076         GenerateCompareTest(condition);
2077         cond = X86_64IntegerCondition(condition->GetCondition());
2078       }
2079     } else {
2080       // Must be a Boolean condition, which needs to be compared to 0.
2081       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2082       __ testl(cond_reg, cond_reg);
2083     }
2084 
2085     // If the condition is true, overwrite the output, which already contains false.
2086     // Generate the correct sized CMOV.
2087     bool is_64_bit = DataType::Is64BitType(select->GetType());
2088     if (value_true_loc.IsRegister()) {
2089       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2090     } else {
2091       __ cmov(cond,
2092               value_false,
2093               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2094     }
2095   } else {
2096     NearLabel false_target;
2097     GenerateTestAndBranch<NearLabel>(select,
2098                                      /* condition_input_index= */ 2,
2099                                      /* true_target= */ nullptr,
2100                                      &false_target);
2101     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2102     __ Bind(&false_target);
2103   }
2104 }
2105 
VisitNativeDebugInfo(HNativeDebugInfo * info)2106 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2107   new (GetGraph()->GetAllocator()) LocationSummary(info);
2108 }
2109 
VisitNativeDebugInfo(HNativeDebugInfo *)2110 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
2111   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2112 }
2113 
IncreaseFrame(size_t adjustment)2114 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2115   __ subq(CpuRegister(RSP), Immediate(adjustment));
2116   __ cfi().AdjustCFAOffset(adjustment);
2117 }
2118 
DecreaseFrame(size_t adjustment)2119 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2120   __ addq(CpuRegister(RSP), Immediate(adjustment));
2121   __ cfi().AdjustCFAOffset(-adjustment);
2122 }
2123 
GenerateNop()2124 void CodeGeneratorX86_64::GenerateNop() {
2125   __ nop();
2126 }
2127 
HandleCondition(HCondition * cond)2128 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2129   LocationSummary* locations =
2130       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2131   // Handle the long/FP comparisons made in instruction simplification.
2132   switch (cond->InputAt(0)->GetType()) {
2133     case DataType::Type::kInt64:
2134       locations->SetInAt(0, Location::RequiresRegister());
2135       locations->SetInAt(1, Location::Any());
2136       break;
2137     case DataType::Type::kFloat32:
2138     case DataType::Type::kFloat64:
2139       locations->SetInAt(0, Location::RequiresFpuRegister());
2140       locations->SetInAt(1, Location::Any());
2141       break;
2142     default:
2143       locations->SetInAt(0, Location::RequiresRegister());
2144       locations->SetInAt(1, Location::Any());
2145       break;
2146   }
2147   if (!cond->IsEmittedAtUseSite()) {
2148     locations->SetOut(Location::RequiresRegister());
2149   }
2150 }
2151 
HandleCondition(HCondition * cond)2152 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2153   if (cond->IsEmittedAtUseSite()) {
2154     return;
2155   }
2156 
2157   LocationSummary* locations = cond->GetLocations();
2158   Location lhs = locations->InAt(0);
2159   Location rhs = locations->InAt(1);
2160   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2161   NearLabel true_label, false_label;
2162 
2163   switch (cond->InputAt(0)->GetType()) {
2164     default:
2165       // Integer case.
2166 
2167       // Clear output register: setcc only sets the low byte.
2168       __ xorl(reg, reg);
2169 
2170       codegen_->GenerateIntCompare(lhs, rhs);
2171       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2172       return;
2173     case DataType::Type::kInt64:
2174       // Clear output register: setcc only sets the low byte.
2175       __ xorl(reg, reg);
2176 
2177       codegen_->GenerateLongCompare(lhs, rhs);
2178       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2179       return;
2180     case DataType::Type::kFloat32: {
2181       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2182       if (rhs.IsConstant()) {
2183         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2184         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2185       } else if (rhs.IsStackSlot()) {
2186         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2187       } else {
2188         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2189       }
2190       GenerateFPJumps(cond, &true_label, &false_label);
2191       break;
2192     }
2193     case DataType::Type::kFloat64: {
2194       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2195       if (rhs.IsConstant()) {
2196         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2197         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2198       } else if (rhs.IsDoubleStackSlot()) {
2199         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2200       } else {
2201         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2202       }
2203       GenerateFPJumps(cond, &true_label, &false_label);
2204       break;
2205     }
2206   }
2207 
2208   // Convert the jumps into the result.
2209   NearLabel done_label;
2210 
2211   // False case: result = 0.
2212   __ Bind(&false_label);
2213   __ xorl(reg, reg);
2214   __ jmp(&done_label);
2215 
2216   // True case: result = 1.
2217   __ Bind(&true_label);
2218   __ movl(reg, Immediate(1));
2219   __ Bind(&done_label);
2220 }
2221 
VisitEqual(HEqual * comp)2222 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2223   HandleCondition(comp);
2224 }
2225 
VisitEqual(HEqual * comp)2226 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2227   HandleCondition(comp);
2228 }
2229 
VisitNotEqual(HNotEqual * comp)2230 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2231   HandleCondition(comp);
2232 }
2233 
VisitNotEqual(HNotEqual * comp)2234 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2235   HandleCondition(comp);
2236 }
2237 
VisitLessThan(HLessThan * comp)2238 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2239   HandleCondition(comp);
2240 }
2241 
VisitLessThan(HLessThan * comp)2242 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2243   HandleCondition(comp);
2244 }
2245 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2246 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2247   HandleCondition(comp);
2248 }
2249 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2250 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2251   HandleCondition(comp);
2252 }
2253 
VisitGreaterThan(HGreaterThan * comp)2254 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2255   HandleCondition(comp);
2256 }
2257 
VisitGreaterThan(HGreaterThan * comp)2258 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2259   HandleCondition(comp);
2260 }
2261 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2262 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2263   HandleCondition(comp);
2264 }
2265 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2266 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2267   HandleCondition(comp);
2268 }
2269 
VisitBelow(HBelow * comp)2270 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2271   HandleCondition(comp);
2272 }
2273 
VisitBelow(HBelow * comp)2274 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2275   HandleCondition(comp);
2276 }
2277 
VisitBelowOrEqual(HBelowOrEqual * comp)2278 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2279   HandleCondition(comp);
2280 }
2281 
VisitBelowOrEqual(HBelowOrEqual * comp)2282 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2283   HandleCondition(comp);
2284 }
2285 
VisitAbove(HAbove * comp)2286 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2287   HandleCondition(comp);
2288 }
2289 
VisitAbove(HAbove * comp)2290 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2291   HandleCondition(comp);
2292 }
2293 
VisitAboveOrEqual(HAboveOrEqual * comp)2294 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2295   HandleCondition(comp);
2296 }
2297 
VisitAboveOrEqual(HAboveOrEqual * comp)2298 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2299   HandleCondition(comp);
2300 }
2301 
VisitCompare(HCompare * compare)2302 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2303   LocationSummary* locations =
2304       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2305   switch (compare->InputAt(0)->GetType()) {
2306     case DataType::Type::kBool:
2307     case DataType::Type::kUint8:
2308     case DataType::Type::kInt8:
2309     case DataType::Type::kUint16:
2310     case DataType::Type::kInt16:
2311     case DataType::Type::kInt32:
2312     case DataType::Type::kInt64: {
2313       locations->SetInAt(0, Location::RequiresRegister());
2314       locations->SetInAt(1, Location::Any());
2315       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2316       break;
2317     }
2318     case DataType::Type::kFloat32:
2319     case DataType::Type::kFloat64: {
2320       locations->SetInAt(0, Location::RequiresFpuRegister());
2321       locations->SetInAt(1, Location::Any());
2322       locations->SetOut(Location::RequiresRegister());
2323       break;
2324     }
2325     default:
2326       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2327   }
2328 }
2329 
VisitCompare(HCompare * compare)2330 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2331   LocationSummary* locations = compare->GetLocations();
2332   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2333   Location left = locations->InAt(0);
2334   Location right = locations->InAt(1);
2335 
2336   NearLabel less, greater, done;
2337   DataType::Type type = compare->InputAt(0)->GetType();
2338   Condition less_cond = kLess;
2339 
2340   switch (type) {
2341     case DataType::Type::kBool:
2342     case DataType::Type::kUint8:
2343     case DataType::Type::kInt8:
2344     case DataType::Type::kUint16:
2345     case DataType::Type::kInt16:
2346     case DataType::Type::kInt32: {
2347       codegen_->GenerateIntCompare(left, right);
2348       break;
2349     }
2350     case DataType::Type::kInt64: {
2351       codegen_->GenerateLongCompare(left, right);
2352       break;
2353     }
2354     case DataType::Type::kFloat32: {
2355       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2356       if (right.IsConstant()) {
2357         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2358         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2359       } else if (right.IsStackSlot()) {
2360         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2361       } else {
2362         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2363       }
2364       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2365       less_cond = kBelow;  //  ucomis{s,d} sets CF
2366       break;
2367     }
2368     case DataType::Type::kFloat64: {
2369       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2370       if (right.IsConstant()) {
2371         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2372         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2373       } else if (right.IsDoubleStackSlot()) {
2374         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2375       } else {
2376         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2377       }
2378       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2379       less_cond = kBelow;  //  ucomis{s,d} sets CF
2380       break;
2381     }
2382     default:
2383       LOG(FATAL) << "Unexpected compare type " << type;
2384   }
2385 
2386   __ movl(out, Immediate(0));
2387   __ j(kEqual, &done);
2388   __ j(less_cond, &less);
2389 
2390   __ Bind(&greater);
2391   __ movl(out, Immediate(1));
2392   __ jmp(&done);
2393 
2394   __ Bind(&less);
2395   __ movl(out, Immediate(-1));
2396 
2397   __ Bind(&done);
2398 }
2399 
VisitIntConstant(HIntConstant * constant)2400 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2401   LocationSummary* locations =
2402       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2403   locations->SetOut(Location::ConstantLocation(constant));
2404 }
2405 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2406 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2407   // Will be generated at use site.
2408 }
2409 
VisitNullConstant(HNullConstant * constant)2410 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2411   LocationSummary* locations =
2412       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2413   locations->SetOut(Location::ConstantLocation(constant));
2414 }
2415 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2416 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2417   // Will be generated at use site.
2418 }
2419 
VisitLongConstant(HLongConstant * constant)2420 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2421   LocationSummary* locations =
2422       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2423   locations->SetOut(Location::ConstantLocation(constant));
2424 }
2425 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2426 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2427   // Will be generated at use site.
2428 }
2429 
VisitFloatConstant(HFloatConstant * constant)2430 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2431   LocationSummary* locations =
2432       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2433   locations->SetOut(Location::ConstantLocation(constant));
2434 }
2435 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2436 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2437   // Will be generated at use site.
2438 }
2439 
VisitDoubleConstant(HDoubleConstant * constant)2440 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2441   LocationSummary* locations =
2442       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2443   locations->SetOut(Location::ConstantLocation(constant));
2444 }
2445 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2446 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2447     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2448   // Will be generated at use site.
2449 }
2450 
VisitConstructorFence(HConstructorFence * constructor_fence)2451 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2452   constructor_fence->SetLocations(nullptr);
2453 }
2454 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2455 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2456     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2457   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2458 }
2459 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2460 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2461   memory_barrier->SetLocations(nullptr);
2462 }
2463 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2464 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2465   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2466 }
2467 
VisitReturnVoid(HReturnVoid * ret)2468 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2469   ret->SetLocations(nullptr);
2470 }
2471 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2472 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2473   codegen_->GenerateFrameExit();
2474 }
2475 
VisitReturn(HReturn * ret)2476 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2477   LocationSummary* locations =
2478       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2479   switch (ret->InputAt(0)->GetType()) {
2480     case DataType::Type::kReference:
2481     case DataType::Type::kBool:
2482     case DataType::Type::kUint8:
2483     case DataType::Type::kInt8:
2484     case DataType::Type::kUint16:
2485     case DataType::Type::kInt16:
2486     case DataType::Type::kInt32:
2487     case DataType::Type::kInt64:
2488       locations->SetInAt(0, Location::RegisterLocation(RAX));
2489       break;
2490 
2491     case DataType::Type::kFloat32:
2492     case DataType::Type::kFloat64:
2493       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2494       break;
2495 
2496     default:
2497       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2498   }
2499 }
2500 
VisitReturn(HReturn * ret)2501 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2502   switch (ret->InputAt(0)->GetType()) {
2503     case DataType::Type::kReference:
2504     case DataType::Type::kBool:
2505     case DataType::Type::kUint8:
2506     case DataType::Type::kInt8:
2507     case DataType::Type::kUint16:
2508     case DataType::Type::kInt16:
2509     case DataType::Type::kInt32:
2510     case DataType::Type::kInt64:
2511       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2512       break;
2513 
2514     case DataType::Type::kFloat32: {
2515       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2516                 XMM0);
2517       // To simplify callers of an OSR method, we put the return value in both
2518       // floating point and core register.
2519       if (GetGraph()->IsCompilingOsr()) {
2520         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2521       }
2522       break;
2523     }
2524     case DataType::Type::kFloat64: {
2525       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2526                 XMM0);
2527       // To simplify callers of an OSR method, we put the return value in both
2528       // floating point and core register.
2529       if (GetGraph()->IsCompilingOsr()) {
2530         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2531       }
2532       break;
2533     }
2534 
2535     default:
2536       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2537   }
2538   codegen_->GenerateFrameExit();
2539 }
2540 
GetReturnLocation(DataType::Type type) const2541 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2542   switch (type) {
2543     case DataType::Type::kReference:
2544     case DataType::Type::kBool:
2545     case DataType::Type::kUint8:
2546     case DataType::Type::kInt8:
2547     case DataType::Type::kUint16:
2548     case DataType::Type::kInt16:
2549     case DataType::Type::kUint32:
2550     case DataType::Type::kInt32:
2551     case DataType::Type::kUint64:
2552     case DataType::Type::kInt64:
2553       return Location::RegisterLocation(RAX);
2554 
2555     case DataType::Type::kVoid:
2556       return Location::NoLocation();
2557 
2558     case DataType::Type::kFloat64:
2559     case DataType::Type::kFloat32:
2560       return Location::FpuRegisterLocation(XMM0);
2561   }
2562 
2563   UNREACHABLE();
2564 }
2565 
GetMethodLocation() const2566 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2567   return Location::RegisterLocation(kMethodRegisterArgument);
2568 }
2569 
GetNextLocation(DataType::Type type)2570 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2571   switch (type) {
2572     case DataType::Type::kReference:
2573     case DataType::Type::kBool:
2574     case DataType::Type::kUint8:
2575     case DataType::Type::kInt8:
2576     case DataType::Type::kUint16:
2577     case DataType::Type::kInt16:
2578     case DataType::Type::kInt32: {
2579       uint32_t index = gp_index_++;
2580       stack_index_++;
2581       if (index < calling_convention.GetNumberOfRegisters()) {
2582         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2583       } else {
2584         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2585       }
2586     }
2587 
2588     case DataType::Type::kInt64: {
2589       uint32_t index = gp_index_;
2590       stack_index_ += 2;
2591       if (index < calling_convention.GetNumberOfRegisters()) {
2592         gp_index_ += 1;
2593         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2594       } else {
2595         gp_index_ += 2;
2596         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2597       }
2598     }
2599 
2600     case DataType::Type::kFloat32: {
2601       uint32_t index = float_index_++;
2602       stack_index_++;
2603       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2604         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2605       } else {
2606         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2607       }
2608     }
2609 
2610     case DataType::Type::kFloat64: {
2611       uint32_t index = float_index_++;
2612       stack_index_ += 2;
2613       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2614         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2615       } else {
2616         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2617       }
2618     }
2619 
2620     case DataType::Type::kUint32:
2621     case DataType::Type::kUint64:
2622     case DataType::Type::kVoid:
2623       LOG(FATAL) << "Unexpected parameter type " << type;
2624       UNREACHABLE();
2625   }
2626   return Location::NoLocation();
2627 }
2628 
GetNextLocation(DataType::Type type)2629 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2630   DCHECK_NE(type, DataType::Type::kReference);
2631 
2632   Location location = Location::NoLocation();
2633   if (DataType::IsFloatingPointType(type)) {
2634     if (fpr_index_ < kParameterFloatRegistersLength) {
2635       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2636       ++fpr_index_;
2637     }
2638   } else {
2639     // Native ABI uses the same registers as managed, except that the method register RDI
2640     // is a normal argument.
2641     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2642       location = Location::RegisterLocation(
2643           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2644       ++gpr_index_;
2645     }
2646   }
2647   if (location.IsInvalid()) {
2648     if (DataType::Is64BitType(type)) {
2649       location = Location::DoubleStackSlot(stack_offset_);
2650     } else {
2651       location = Location::StackSlot(stack_offset_);
2652     }
2653     stack_offset_ += kFramePointerSize;
2654 
2655     if (for_register_allocation_) {
2656       location = Location::Any();
2657     }
2658   }
2659   return location;
2660 }
2661 
GetReturnLocation(DataType::Type type) const2662 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2663     const {
2664   // We perform conversion to the managed ABI return register after the call if needed.
2665   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2666   return dex_calling_convention.GetReturnLocation(type);
2667 }
2668 
GetMethodLocation() const2669 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2670   // Pass the method in the hidden argument RAX.
2671   return Location::RegisterLocation(RAX);
2672 }
2673 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2674 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2675   // The trampoline uses the same calling convention as dex calling conventions,
2676   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2677   // the method_idx.
2678   HandleInvoke(invoke);
2679 }
2680 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2681 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2682   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2683 }
2684 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2685 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2686   // Explicit clinit checks triggered by static invokes must have been pruned by
2687   // art::PrepareForRegisterAllocation.
2688   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2689 
2690   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2691   if (intrinsic.TryDispatch(invoke)) {
2692     return;
2693   }
2694 
2695   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2696     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2697         /*for_register_allocation=*/ true);
2698     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2699     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2700   } else {
2701     HandleInvoke(invoke);
2702   }
2703 }
2704 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2705 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2706   if (invoke->GetLocations()->Intrinsified()) {
2707     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2708     intrinsic.Dispatch(invoke);
2709     return true;
2710   }
2711   return false;
2712 }
2713 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2714 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2715   // Explicit clinit checks triggered by static invokes must have been pruned by
2716   // art::PrepareForRegisterAllocation.
2717   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2718 
2719   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2720     return;
2721   }
2722 
2723   LocationSummary* locations = invoke->GetLocations();
2724   codegen_->GenerateStaticOrDirectCall(
2725       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2726 }
2727 
HandleInvoke(HInvoke * invoke)2728 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2729   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2730   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2731 }
2732 
VisitInvokeVirtual(HInvokeVirtual * invoke)2733 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2734   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2735   if (intrinsic.TryDispatch(invoke)) {
2736     return;
2737   }
2738 
2739   HandleInvoke(invoke);
2740 }
2741 
VisitInvokeVirtual(HInvokeVirtual * invoke)2742 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2743   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2744     return;
2745   }
2746 
2747   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2748   DCHECK(!codegen_->IsLeafMethod());
2749 }
2750 
VisitInvokeInterface(HInvokeInterface * invoke)2751 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2752   HandleInvoke(invoke);
2753   // Add the hidden argument.
2754   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2755     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2756                                     Location::RegisterLocation(RAX));
2757   }
2758   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2759 }
2760 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2761 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2762                                                         CpuRegister klass) {
2763   DCHECK_EQ(RDI, klass.AsRegister());
2764   // We know the destination of an intrinsic, so no need to record inline
2765   // caches.
2766   if (!instruction->GetLocations()->Intrinsified() &&
2767       GetGraph()->IsCompilingBaseline() &&
2768       !Runtime::Current()->IsAotCompiler()) {
2769     ScopedProfilingInfoUse spiu(
2770         Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
2771     ProfilingInfo* info = spiu.GetProfilingInfo();
2772     if (info != nullptr) {
2773       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2774       uint64_t address = reinterpret_cast64<uint64_t>(cache);
2775       NearLabel done;
2776       __ movq(CpuRegister(TMP), Immediate(address));
2777       // Fast path for a monomorphic cache.
2778       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2779       __ j(kEqual, &done);
2780       GenerateInvokeRuntime(
2781           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2782       __ Bind(&done);
2783     }
2784   }
2785 }
2786 
VisitInvokeInterface(HInvokeInterface * invoke)2787 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2788   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2789   LocationSummary* locations = invoke->GetLocations();
2790   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2791   Location receiver = locations->InAt(0);
2792   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2793 
2794   if (receiver.IsStackSlot()) {
2795     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2796     // /* HeapReference<Class> */ temp = temp->klass_
2797     __ movl(temp, Address(temp, class_offset));
2798   } else {
2799     // /* HeapReference<Class> */ temp = receiver->klass_
2800     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2801   }
2802   codegen_->MaybeRecordImplicitNullCheck(invoke);
2803   // Instead of simply (possibly) unpoisoning `temp` here, we should
2804   // emit a read barrier for the previous class reference load.
2805   // However this is not required in practice, as this is an
2806   // intermediate/temporary reference and because the current
2807   // concurrent copying collector keeps the from-space memory
2808   // intact/accessible until the end of the marking phase (the
2809   // concurrent copying collector may not in the future).
2810   __ MaybeUnpoisonHeapReference(temp);
2811 
2812   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2813 
2814   if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
2815       invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2816     Location hidden_reg = locations->GetTemp(1);
2817     // Set the hidden argument. This is safe to do this here, as RAX
2818     // won't be modified thereafter, before the `call` instruction.
2819     // We also do it after MaybeGenerateInlineCache that may use RAX.
2820     DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
2821     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
2822   }
2823 
2824   // temp = temp->GetAddressOfIMT()
2825   __ movq(temp,
2826       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2827   // temp = temp->GetImtEntryAt(method_offset);
2828   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2829       invoke->GetImtIndex(), kX86_64PointerSize));
2830   // temp = temp->GetImtEntryAt(method_offset);
2831   __ movq(temp, Address(temp, method_offset));
2832   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2833     // We pass the method from the IMT in case of a conflict. This will ensure
2834     // we go into the runtime to resolve the actual method.
2835     Location hidden_reg = locations->GetTemp(1);
2836     __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
2837   }
2838   // call temp->GetEntryPoint();
2839   __ call(Address(
2840       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2841 
2842   DCHECK(!codegen_->IsLeafMethod());
2843   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2844 }
2845 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2846 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2847   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2848   if (intrinsic.TryDispatch(invoke)) {
2849     return;
2850   }
2851   HandleInvoke(invoke);
2852 }
2853 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2854 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2855   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2856     return;
2857   }
2858   codegen_->GenerateInvokePolymorphicCall(invoke);
2859 }
2860 
VisitInvokeCustom(HInvokeCustom * invoke)2861 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2862   HandleInvoke(invoke);
2863 }
2864 
VisitInvokeCustom(HInvokeCustom * invoke)2865 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2866   codegen_->GenerateInvokeCustomCall(invoke);
2867 }
2868 
VisitNeg(HNeg * neg)2869 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2870   LocationSummary* locations =
2871       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2872   switch (neg->GetResultType()) {
2873     case DataType::Type::kInt32:
2874     case DataType::Type::kInt64:
2875       locations->SetInAt(0, Location::RequiresRegister());
2876       locations->SetOut(Location::SameAsFirstInput());
2877       break;
2878 
2879     case DataType::Type::kFloat32:
2880     case DataType::Type::kFloat64:
2881       locations->SetInAt(0, Location::RequiresFpuRegister());
2882       locations->SetOut(Location::SameAsFirstInput());
2883       locations->AddTemp(Location::RequiresFpuRegister());
2884       break;
2885 
2886     default:
2887       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2888   }
2889 }
2890 
VisitNeg(HNeg * neg)2891 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2892   LocationSummary* locations = neg->GetLocations();
2893   Location out = locations->Out();
2894   Location in = locations->InAt(0);
2895   switch (neg->GetResultType()) {
2896     case DataType::Type::kInt32:
2897       DCHECK(in.IsRegister());
2898       DCHECK(in.Equals(out));
2899       __ negl(out.AsRegister<CpuRegister>());
2900       break;
2901 
2902     case DataType::Type::kInt64:
2903       DCHECK(in.IsRegister());
2904       DCHECK(in.Equals(out));
2905       __ negq(out.AsRegister<CpuRegister>());
2906       break;
2907 
2908     case DataType::Type::kFloat32: {
2909       DCHECK(in.Equals(out));
2910       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2911       // Implement float negation with an exclusive or with value
2912       // 0x80000000 (mask for bit 31, representing the sign of a
2913       // single-precision floating-point number).
2914       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2915       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2916       break;
2917     }
2918 
2919     case DataType::Type::kFloat64: {
2920       DCHECK(in.Equals(out));
2921       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2922       // Implement double negation with an exclusive or with value
2923       // 0x8000000000000000 (mask for bit 63, representing the sign of
2924       // a double-precision floating-point number).
2925       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2926       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2927       break;
2928     }
2929 
2930     default:
2931       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2932   }
2933 }
2934 
VisitTypeConversion(HTypeConversion * conversion)2935 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2936   LocationSummary* locations =
2937       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2938   DataType::Type result_type = conversion->GetResultType();
2939   DataType::Type input_type = conversion->GetInputType();
2940   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2941       << input_type << " -> " << result_type;
2942 
2943   switch (result_type) {
2944     case DataType::Type::kUint8:
2945     case DataType::Type::kInt8:
2946     case DataType::Type::kUint16:
2947     case DataType::Type::kInt16:
2948       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2949       locations->SetInAt(0, Location::Any());
2950       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2951       break;
2952 
2953     case DataType::Type::kInt32:
2954       switch (input_type) {
2955         case DataType::Type::kInt64:
2956           locations->SetInAt(0, Location::Any());
2957           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2958           break;
2959 
2960         case DataType::Type::kFloat32:
2961           locations->SetInAt(0, Location::RequiresFpuRegister());
2962           locations->SetOut(Location::RequiresRegister());
2963           break;
2964 
2965         case DataType::Type::kFloat64:
2966           locations->SetInAt(0, Location::RequiresFpuRegister());
2967           locations->SetOut(Location::RequiresRegister());
2968           break;
2969 
2970         default:
2971           LOG(FATAL) << "Unexpected type conversion from " << input_type
2972                      << " to " << result_type;
2973       }
2974       break;
2975 
2976     case DataType::Type::kInt64:
2977       switch (input_type) {
2978         case DataType::Type::kBool:
2979         case DataType::Type::kUint8:
2980         case DataType::Type::kInt8:
2981         case DataType::Type::kUint16:
2982         case DataType::Type::kInt16:
2983         case DataType::Type::kInt32:
2984           // TODO: We would benefit from a (to-be-implemented)
2985           // Location::RegisterOrStackSlot requirement for this input.
2986           locations->SetInAt(0, Location::RequiresRegister());
2987           locations->SetOut(Location::RequiresRegister());
2988           break;
2989 
2990         case DataType::Type::kFloat32:
2991           locations->SetInAt(0, Location::RequiresFpuRegister());
2992           locations->SetOut(Location::RequiresRegister());
2993           break;
2994 
2995         case DataType::Type::kFloat64:
2996           locations->SetInAt(0, Location::RequiresFpuRegister());
2997           locations->SetOut(Location::RequiresRegister());
2998           break;
2999 
3000         default:
3001           LOG(FATAL) << "Unexpected type conversion from " << input_type
3002                      << " to " << result_type;
3003       }
3004       break;
3005 
3006     case DataType::Type::kFloat32:
3007       switch (input_type) {
3008         case DataType::Type::kBool:
3009         case DataType::Type::kUint8:
3010         case DataType::Type::kInt8:
3011         case DataType::Type::kUint16:
3012         case DataType::Type::kInt16:
3013         case DataType::Type::kInt32:
3014           locations->SetInAt(0, Location::Any());
3015           locations->SetOut(Location::RequiresFpuRegister());
3016           break;
3017 
3018         case DataType::Type::kInt64:
3019           locations->SetInAt(0, Location::Any());
3020           locations->SetOut(Location::RequiresFpuRegister());
3021           break;
3022 
3023         case DataType::Type::kFloat64:
3024           locations->SetInAt(0, Location::Any());
3025           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3026           break;
3027 
3028         default:
3029           LOG(FATAL) << "Unexpected type conversion from " << input_type
3030                      << " to " << result_type;
3031       }
3032       break;
3033 
3034     case DataType::Type::kFloat64:
3035       switch (input_type) {
3036         case DataType::Type::kBool:
3037         case DataType::Type::kUint8:
3038         case DataType::Type::kInt8:
3039         case DataType::Type::kUint16:
3040         case DataType::Type::kInt16:
3041         case DataType::Type::kInt32:
3042           locations->SetInAt(0, Location::Any());
3043           locations->SetOut(Location::RequiresFpuRegister());
3044           break;
3045 
3046         case DataType::Type::kInt64:
3047           locations->SetInAt(0, Location::Any());
3048           locations->SetOut(Location::RequiresFpuRegister());
3049           break;
3050 
3051         case DataType::Type::kFloat32:
3052           locations->SetInAt(0, Location::Any());
3053           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3054           break;
3055 
3056         default:
3057           LOG(FATAL) << "Unexpected type conversion from " << input_type
3058                      << " to " << result_type;
3059       }
3060       break;
3061 
3062     default:
3063       LOG(FATAL) << "Unexpected type conversion from " << input_type
3064                  << " to " << result_type;
3065   }
3066 }
3067 
VisitTypeConversion(HTypeConversion * conversion)3068 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3069   LocationSummary* locations = conversion->GetLocations();
3070   Location out = locations->Out();
3071   Location in = locations->InAt(0);
3072   DataType::Type result_type = conversion->GetResultType();
3073   DataType::Type input_type = conversion->GetInputType();
3074   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3075       << input_type << " -> " << result_type;
3076   switch (result_type) {
3077     case DataType::Type::kUint8:
3078       switch (input_type) {
3079         case DataType::Type::kInt8:
3080         case DataType::Type::kUint16:
3081         case DataType::Type::kInt16:
3082         case DataType::Type::kInt32:
3083         case DataType::Type::kInt64:
3084           if (in.IsRegister()) {
3085             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3086           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3087             __ movzxb(out.AsRegister<CpuRegister>(),
3088                       Address(CpuRegister(RSP), in.GetStackIndex()));
3089           } else {
3090             __ movl(out.AsRegister<CpuRegister>(),
3091                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3092           }
3093           break;
3094 
3095         default:
3096           LOG(FATAL) << "Unexpected type conversion from " << input_type
3097                      << " to " << result_type;
3098       }
3099       break;
3100 
3101     case DataType::Type::kInt8:
3102       switch (input_type) {
3103         case DataType::Type::kUint8:
3104         case DataType::Type::kUint16:
3105         case DataType::Type::kInt16:
3106         case DataType::Type::kInt32:
3107         case DataType::Type::kInt64:
3108           if (in.IsRegister()) {
3109             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3110           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3111             __ movsxb(out.AsRegister<CpuRegister>(),
3112                       Address(CpuRegister(RSP), in.GetStackIndex()));
3113           } else {
3114             __ movl(out.AsRegister<CpuRegister>(),
3115                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3116           }
3117           break;
3118 
3119         default:
3120           LOG(FATAL) << "Unexpected type conversion from " << input_type
3121                      << " to " << result_type;
3122       }
3123       break;
3124 
3125     case DataType::Type::kUint16:
3126       switch (input_type) {
3127         case DataType::Type::kInt8:
3128         case DataType::Type::kInt16:
3129         case DataType::Type::kInt32:
3130         case DataType::Type::kInt64:
3131           if (in.IsRegister()) {
3132             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3133           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3134             __ movzxw(out.AsRegister<CpuRegister>(),
3135                       Address(CpuRegister(RSP), in.GetStackIndex()));
3136           } else {
3137             __ movl(out.AsRegister<CpuRegister>(),
3138                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3139           }
3140           break;
3141 
3142         default:
3143           LOG(FATAL) << "Unexpected type conversion from " << input_type
3144                      << " to " << result_type;
3145       }
3146       break;
3147 
3148     case DataType::Type::kInt16:
3149       switch (input_type) {
3150         case DataType::Type::kUint16:
3151         case DataType::Type::kInt32:
3152         case DataType::Type::kInt64:
3153           if (in.IsRegister()) {
3154             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3155           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3156             __ movsxw(out.AsRegister<CpuRegister>(),
3157                       Address(CpuRegister(RSP), in.GetStackIndex()));
3158           } else {
3159             __ movl(out.AsRegister<CpuRegister>(),
3160                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3161           }
3162           break;
3163 
3164         default:
3165           LOG(FATAL) << "Unexpected type conversion from " << input_type
3166                      << " to " << result_type;
3167       }
3168       break;
3169 
3170     case DataType::Type::kInt32:
3171       switch (input_type) {
3172         case DataType::Type::kInt64:
3173           if (in.IsRegister()) {
3174             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3175           } else if (in.IsDoubleStackSlot()) {
3176             __ movl(out.AsRegister<CpuRegister>(),
3177                     Address(CpuRegister(RSP), in.GetStackIndex()));
3178           } else {
3179             DCHECK(in.IsConstant());
3180             DCHECK(in.GetConstant()->IsLongConstant());
3181             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3182             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3183           }
3184           break;
3185 
3186         case DataType::Type::kFloat32: {
3187           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3188           CpuRegister output = out.AsRegister<CpuRegister>();
3189           NearLabel done, nan;
3190 
3191           __ movl(output, Immediate(kPrimIntMax));
3192           // if input >= (float)INT_MAX goto done
3193           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3194           __ j(kAboveEqual, &done);
3195           // if input == NaN goto nan
3196           __ j(kUnordered, &nan);
3197           // output = float-to-int-truncate(input)
3198           __ cvttss2si(output, input, false);
3199           __ jmp(&done);
3200           __ Bind(&nan);
3201           //  output = 0
3202           __ xorl(output, output);
3203           __ Bind(&done);
3204           break;
3205         }
3206 
3207         case DataType::Type::kFloat64: {
3208           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3209           CpuRegister output = out.AsRegister<CpuRegister>();
3210           NearLabel done, nan;
3211 
3212           __ movl(output, Immediate(kPrimIntMax));
3213           // if input >= (double)INT_MAX goto done
3214           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3215           __ j(kAboveEqual, &done);
3216           // if input == NaN goto nan
3217           __ j(kUnordered, &nan);
3218           // output = double-to-int-truncate(input)
3219           __ cvttsd2si(output, input);
3220           __ jmp(&done);
3221           __ Bind(&nan);
3222           //  output = 0
3223           __ xorl(output, output);
3224           __ Bind(&done);
3225           break;
3226         }
3227 
3228         default:
3229           LOG(FATAL) << "Unexpected type conversion from " << input_type
3230                      << " to " << result_type;
3231       }
3232       break;
3233 
3234     case DataType::Type::kInt64:
3235       switch (input_type) {
3236         DCHECK(out.IsRegister());
3237         case DataType::Type::kBool:
3238         case DataType::Type::kUint8:
3239         case DataType::Type::kInt8:
3240         case DataType::Type::kUint16:
3241         case DataType::Type::kInt16:
3242         case DataType::Type::kInt32:
3243           DCHECK(in.IsRegister());
3244           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3245           break;
3246 
3247         case DataType::Type::kFloat32: {
3248           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3249           CpuRegister output = out.AsRegister<CpuRegister>();
3250           NearLabel done, nan;
3251 
3252           codegen_->Load64BitValue(output, kPrimLongMax);
3253           // if input >= (float)LONG_MAX goto done
3254           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3255           __ j(kAboveEqual, &done);
3256           // if input == NaN goto nan
3257           __ j(kUnordered, &nan);
3258           // output = float-to-long-truncate(input)
3259           __ cvttss2si(output, input, true);
3260           __ jmp(&done);
3261           __ Bind(&nan);
3262           //  output = 0
3263           __ xorl(output, output);
3264           __ Bind(&done);
3265           break;
3266         }
3267 
3268         case DataType::Type::kFloat64: {
3269           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3270           CpuRegister output = out.AsRegister<CpuRegister>();
3271           NearLabel done, nan;
3272 
3273           codegen_->Load64BitValue(output, kPrimLongMax);
3274           // if input >= (double)LONG_MAX goto done
3275           __ comisd(input, codegen_->LiteralDoubleAddress(
3276                 static_cast<double>(kPrimLongMax)));
3277           __ j(kAboveEqual, &done);
3278           // if input == NaN goto nan
3279           __ j(kUnordered, &nan);
3280           // output = double-to-long-truncate(input)
3281           __ cvttsd2si(output, input, true);
3282           __ jmp(&done);
3283           __ Bind(&nan);
3284           //  output = 0
3285           __ xorl(output, output);
3286           __ Bind(&done);
3287           break;
3288         }
3289 
3290         default:
3291           LOG(FATAL) << "Unexpected type conversion from " << input_type
3292                      << " to " << result_type;
3293       }
3294       break;
3295 
3296     case DataType::Type::kFloat32:
3297       switch (input_type) {
3298         case DataType::Type::kBool:
3299         case DataType::Type::kUint8:
3300         case DataType::Type::kInt8:
3301         case DataType::Type::kUint16:
3302         case DataType::Type::kInt16:
3303         case DataType::Type::kInt32:
3304           if (in.IsRegister()) {
3305             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3306           } else if (in.IsConstant()) {
3307             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3308             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3309             codegen_->Load32BitValue(dest, static_cast<float>(v));
3310           } else {
3311             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3312                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3313           }
3314           break;
3315 
3316         case DataType::Type::kInt64:
3317           if (in.IsRegister()) {
3318             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3319           } else if (in.IsConstant()) {
3320             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3321             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3322             codegen_->Load32BitValue(dest, static_cast<float>(v));
3323           } else {
3324             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3325                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3326           }
3327           break;
3328 
3329         case DataType::Type::kFloat64:
3330           if (in.IsFpuRegister()) {
3331             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3332           } else if (in.IsConstant()) {
3333             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3334             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3335             codegen_->Load32BitValue(dest, static_cast<float>(v));
3336           } else {
3337             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3338                         Address(CpuRegister(RSP), in.GetStackIndex()));
3339           }
3340           break;
3341 
3342         default:
3343           LOG(FATAL) << "Unexpected type conversion from " << input_type
3344                      << " to " << result_type;
3345       }
3346       break;
3347 
3348     case DataType::Type::kFloat64:
3349       switch (input_type) {
3350         case DataType::Type::kBool:
3351         case DataType::Type::kUint8:
3352         case DataType::Type::kInt8:
3353         case DataType::Type::kUint16:
3354         case DataType::Type::kInt16:
3355         case DataType::Type::kInt32:
3356           if (in.IsRegister()) {
3357             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3358           } else if (in.IsConstant()) {
3359             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3360             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3361             codegen_->Load64BitValue(dest, static_cast<double>(v));
3362           } else {
3363             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3364                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3365           }
3366           break;
3367 
3368         case DataType::Type::kInt64:
3369           if (in.IsRegister()) {
3370             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3371           } else if (in.IsConstant()) {
3372             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3373             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3374             codegen_->Load64BitValue(dest, static_cast<double>(v));
3375           } else {
3376             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3377                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3378           }
3379           break;
3380 
3381         case DataType::Type::kFloat32:
3382           if (in.IsFpuRegister()) {
3383             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3384           } else if (in.IsConstant()) {
3385             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3386             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3387             codegen_->Load64BitValue(dest, static_cast<double>(v));
3388           } else {
3389             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3390                         Address(CpuRegister(RSP), in.GetStackIndex()));
3391           }
3392           break;
3393 
3394         default:
3395           LOG(FATAL) << "Unexpected type conversion from " << input_type
3396                      << " to " << result_type;
3397       }
3398       break;
3399 
3400     default:
3401       LOG(FATAL) << "Unexpected type conversion from " << input_type
3402                  << " to " << result_type;
3403   }
3404 }
3405 
VisitAdd(HAdd * add)3406 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3407   LocationSummary* locations =
3408       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3409   switch (add->GetResultType()) {
3410     case DataType::Type::kInt32: {
3411       locations->SetInAt(0, Location::RequiresRegister());
3412       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3413       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3414       break;
3415     }
3416 
3417     case DataType::Type::kInt64: {
3418       locations->SetInAt(0, Location::RequiresRegister());
3419       // We can use a leaq or addq if the constant can fit in an immediate.
3420       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3421       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3422       break;
3423     }
3424 
3425     case DataType::Type::kFloat64:
3426     case DataType::Type::kFloat32: {
3427       locations->SetInAt(0, Location::RequiresFpuRegister());
3428       locations->SetInAt(1, Location::Any());
3429       locations->SetOut(Location::SameAsFirstInput());
3430       break;
3431     }
3432 
3433     default:
3434       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3435   }
3436 }
3437 
VisitAdd(HAdd * add)3438 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3439   LocationSummary* locations = add->GetLocations();
3440   Location first = locations->InAt(0);
3441   Location second = locations->InAt(1);
3442   Location out = locations->Out();
3443 
3444   switch (add->GetResultType()) {
3445     case DataType::Type::kInt32: {
3446       if (second.IsRegister()) {
3447         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3448           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3449         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3450           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3451         } else {
3452           __ leal(out.AsRegister<CpuRegister>(), Address(
3453               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3454         }
3455       } else if (second.IsConstant()) {
3456         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3457           __ addl(out.AsRegister<CpuRegister>(),
3458                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3459         } else {
3460           __ leal(out.AsRegister<CpuRegister>(), Address(
3461               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3462         }
3463       } else {
3464         DCHECK(first.Equals(locations->Out()));
3465         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3466       }
3467       break;
3468     }
3469 
3470     case DataType::Type::kInt64: {
3471       if (second.IsRegister()) {
3472         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3473           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3474         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3475           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3476         } else {
3477           __ leaq(out.AsRegister<CpuRegister>(), Address(
3478               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3479         }
3480       } else {
3481         DCHECK(second.IsConstant());
3482         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3483         int32_t int32_value = Low32Bits(value);
3484         DCHECK_EQ(int32_value, value);
3485         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3486           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3487         } else {
3488           __ leaq(out.AsRegister<CpuRegister>(), Address(
3489               first.AsRegister<CpuRegister>(), int32_value));
3490         }
3491       }
3492       break;
3493     }
3494 
3495     case DataType::Type::kFloat32: {
3496       if (second.IsFpuRegister()) {
3497         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3498       } else if (second.IsConstant()) {
3499         __ addss(first.AsFpuRegister<XmmRegister>(),
3500                  codegen_->LiteralFloatAddress(
3501                      second.GetConstant()->AsFloatConstant()->GetValue()));
3502       } else {
3503         DCHECK(second.IsStackSlot());
3504         __ addss(first.AsFpuRegister<XmmRegister>(),
3505                  Address(CpuRegister(RSP), second.GetStackIndex()));
3506       }
3507       break;
3508     }
3509 
3510     case DataType::Type::kFloat64: {
3511       if (second.IsFpuRegister()) {
3512         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3513       } else if (second.IsConstant()) {
3514         __ addsd(first.AsFpuRegister<XmmRegister>(),
3515                  codegen_->LiteralDoubleAddress(
3516                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3517       } else {
3518         DCHECK(second.IsDoubleStackSlot());
3519         __ addsd(first.AsFpuRegister<XmmRegister>(),
3520                  Address(CpuRegister(RSP), second.GetStackIndex()));
3521       }
3522       break;
3523     }
3524 
3525     default:
3526       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3527   }
3528 }
3529 
VisitSub(HSub * sub)3530 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3531   LocationSummary* locations =
3532       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3533   switch (sub->GetResultType()) {
3534     case DataType::Type::kInt32: {
3535       locations->SetInAt(0, Location::RequiresRegister());
3536       locations->SetInAt(1, Location::Any());
3537       locations->SetOut(Location::SameAsFirstInput());
3538       break;
3539     }
3540     case DataType::Type::kInt64: {
3541       locations->SetInAt(0, Location::RequiresRegister());
3542       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3543       locations->SetOut(Location::SameAsFirstInput());
3544       break;
3545     }
3546     case DataType::Type::kFloat32:
3547     case DataType::Type::kFloat64: {
3548       locations->SetInAt(0, Location::RequiresFpuRegister());
3549       locations->SetInAt(1, Location::Any());
3550       locations->SetOut(Location::SameAsFirstInput());
3551       break;
3552     }
3553     default:
3554       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3555   }
3556 }
3557 
VisitSub(HSub * sub)3558 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3559   LocationSummary* locations = sub->GetLocations();
3560   Location first = locations->InAt(0);
3561   Location second = locations->InAt(1);
3562   DCHECK(first.Equals(locations->Out()));
3563   switch (sub->GetResultType()) {
3564     case DataType::Type::kInt32: {
3565       if (second.IsRegister()) {
3566         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3567       } else if (second.IsConstant()) {
3568         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3569         __ subl(first.AsRegister<CpuRegister>(), imm);
3570       } else {
3571         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3572       }
3573       break;
3574     }
3575     case DataType::Type::kInt64: {
3576       if (second.IsConstant()) {
3577         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3578         DCHECK(IsInt<32>(value));
3579         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3580       } else {
3581         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3582       }
3583       break;
3584     }
3585 
3586     case DataType::Type::kFloat32: {
3587       if (second.IsFpuRegister()) {
3588         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3589       } else if (second.IsConstant()) {
3590         __ subss(first.AsFpuRegister<XmmRegister>(),
3591                  codegen_->LiteralFloatAddress(
3592                      second.GetConstant()->AsFloatConstant()->GetValue()));
3593       } else {
3594         DCHECK(second.IsStackSlot());
3595         __ subss(first.AsFpuRegister<XmmRegister>(),
3596                  Address(CpuRegister(RSP), second.GetStackIndex()));
3597       }
3598       break;
3599     }
3600 
3601     case DataType::Type::kFloat64: {
3602       if (second.IsFpuRegister()) {
3603         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3604       } else if (second.IsConstant()) {
3605         __ subsd(first.AsFpuRegister<XmmRegister>(),
3606                  codegen_->LiteralDoubleAddress(
3607                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3608       } else {
3609         DCHECK(second.IsDoubleStackSlot());
3610         __ subsd(first.AsFpuRegister<XmmRegister>(),
3611                  Address(CpuRegister(RSP), second.GetStackIndex()));
3612       }
3613       break;
3614     }
3615 
3616     default:
3617       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3618   }
3619 }
3620 
VisitMul(HMul * mul)3621 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3622   LocationSummary* locations =
3623       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3624   switch (mul->GetResultType()) {
3625     case DataType::Type::kInt32: {
3626       locations->SetInAt(0, Location::RequiresRegister());
3627       locations->SetInAt(1, Location::Any());
3628       if (mul->InputAt(1)->IsIntConstant()) {
3629         // Can use 3 operand multiply.
3630         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3631       } else {
3632         locations->SetOut(Location::SameAsFirstInput());
3633       }
3634       break;
3635     }
3636     case DataType::Type::kInt64: {
3637       locations->SetInAt(0, Location::RequiresRegister());
3638       locations->SetInAt(1, Location::Any());
3639       if (mul->InputAt(1)->IsLongConstant() &&
3640           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3641         // Can use 3 operand multiply.
3642         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3643       } else {
3644         locations->SetOut(Location::SameAsFirstInput());
3645       }
3646       break;
3647     }
3648     case DataType::Type::kFloat32:
3649     case DataType::Type::kFloat64: {
3650       locations->SetInAt(0, Location::RequiresFpuRegister());
3651       locations->SetInAt(1, Location::Any());
3652       locations->SetOut(Location::SameAsFirstInput());
3653       break;
3654     }
3655 
3656     default:
3657       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3658   }
3659 }
3660 
VisitMul(HMul * mul)3661 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3662   LocationSummary* locations = mul->GetLocations();
3663   Location first = locations->InAt(0);
3664   Location second = locations->InAt(1);
3665   Location out = locations->Out();
3666   switch (mul->GetResultType()) {
3667     case DataType::Type::kInt32:
3668       // The constant may have ended up in a register, so test explicitly to avoid
3669       // problems where the output may not be the same as the first operand.
3670       if (mul->InputAt(1)->IsIntConstant()) {
3671         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3672         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3673       } else if (second.IsRegister()) {
3674         DCHECK(first.Equals(out));
3675         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3676       } else {
3677         DCHECK(first.Equals(out));
3678         DCHECK(second.IsStackSlot());
3679         __ imull(first.AsRegister<CpuRegister>(),
3680                  Address(CpuRegister(RSP), second.GetStackIndex()));
3681       }
3682       break;
3683     case DataType::Type::kInt64: {
3684       // The constant may have ended up in a register, so test explicitly to avoid
3685       // problems where the output may not be the same as the first operand.
3686       if (mul->InputAt(1)->IsLongConstant()) {
3687         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3688         if (IsInt<32>(value)) {
3689           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3690                    Immediate(static_cast<int32_t>(value)));
3691         } else {
3692           // Have to use the constant area.
3693           DCHECK(first.Equals(out));
3694           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3695         }
3696       } else if (second.IsRegister()) {
3697         DCHECK(first.Equals(out));
3698         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3699       } else {
3700         DCHECK(second.IsDoubleStackSlot());
3701         DCHECK(first.Equals(out));
3702         __ imulq(first.AsRegister<CpuRegister>(),
3703                  Address(CpuRegister(RSP), second.GetStackIndex()));
3704       }
3705       break;
3706     }
3707 
3708     case DataType::Type::kFloat32: {
3709       DCHECK(first.Equals(out));
3710       if (second.IsFpuRegister()) {
3711         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3712       } else if (second.IsConstant()) {
3713         __ mulss(first.AsFpuRegister<XmmRegister>(),
3714                  codegen_->LiteralFloatAddress(
3715                      second.GetConstant()->AsFloatConstant()->GetValue()));
3716       } else {
3717         DCHECK(second.IsStackSlot());
3718         __ mulss(first.AsFpuRegister<XmmRegister>(),
3719                  Address(CpuRegister(RSP), second.GetStackIndex()));
3720       }
3721       break;
3722     }
3723 
3724     case DataType::Type::kFloat64: {
3725       DCHECK(first.Equals(out));
3726       if (second.IsFpuRegister()) {
3727         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3728       } else if (second.IsConstant()) {
3729         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3730                  codegen_->LiteralDoubleAddress(
3731                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3732       } else {
3733         DCHECK(second.IsDoubleStackSlot());
3734         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3735                  Address(CpuRegister(RSP), second.GetStackIndex()));
3736       }
3737       break;
3738     }
3739 
3740     default:
3741       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3742   }
3743 }
3744 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3745 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3746                                                      uint32_t stack_adjustment, bool is_float) {
3747   if (source.IsStackSlot()) {
3748     DCHECK(is_float);
3749     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3750   } else if (source.IsDoubleStackSlot()) {
3751     DCHECK(!is_float);
3752     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3753   } else {
3754     // Write the value to the temporary location on the stack and load to FP stack.
3755     if (is_float) {
3756       Location stack_temp = Location::StackSlot(temp_offset);
3757       codegen_->Move(stack_temp, source);
3758       __ flds(Address(CpuRegister(RSP), temp_offset));
3759     } else {
3760       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3761       codegen_->Move(stack_temp, source);
3762       __ fldl(Address(CpuRegister(RSP), temp_offset));
3763     }
3764   }
3765 }
3766 
GenerateRemFP(HRem * rem)3767 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3768   DataType::Type type = rem->GetResultType();
3769   bool is_float = type == DataType::Type::kFloat32;
3770   size_t elem_size = DataType::Size(type);
3771   LocationSummary* locations = rem->GetLocations();
3772   Location first = locations->InAt(0);
3773   Location second = locations->InAt(1);
3774   Location out = locations->Out();
3775 
3776   // Create stack space for 2 elements.
3777   // TODO: enhance register allocator to ask for stack temporaries.
3778   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3779 
3780   // Load the values to the FP stack in reverse order, using temporaries if needed.
3781   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3782   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3783 
3784   // Loop doing FPREM until we stabilize.
3785   NearLabel retry;
3786   __ Bind(&retry);
3787   __ fprem();
3788 
3789   // Move FP status to AX.
3790   __ fstsw();
3791 
3792   // And see if the argument reduction is complete. This is signaled by the
3793   // C2 FPU flag bit set to 0.
3794   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3795   __ j(kNotEqual, &retry);
3796 
3797   // We have settled on the final value. Retrieve it into an XMM register.
3798   // Store FP top of stack to real stack.
3799   if (is_float) {
3800     __ fsts(Address(CpuRegister(RSP), 0));
3801   } else {
3802     __ fstl(Address(CpuRegister(RSP), 0));
3803   }
3804 
3805   // Pop the 2 items from the FP stack.
3806   __ fucompp();
3807 
3808   // Load the value from the stack into an XMM register.
3809   DCHECK(out.IsFpuRegister()) << out;
3810   if (is_float) {
3811     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3812   } else {
3813     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3814   }
3815 
3816   // And remove the temporary stack space we allocated.
3817   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3818 }
3819 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3820 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3821   DCHECK(instruction->IsDiv() || instruction->IsRem());
3822 
3823   LocationSummary* locations = instruction->GetLocations();
3824   Location second = locations->InAt(1);
3825   DCHECK(second.IsConstant());
3826 
3827   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3828   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3829   int64_t imm = Int64FromConstant(second.GetConstant());
3830 
3831   DCHECK(imm == 1 || imm == -1);
3832 
3833   switch (instruction->GetResultType()) {
3834     case DataType::Type::kInt32: {
3835       if (instruction->IsRem()) {
3836         __ xorl(output_register, output_register);
3837       } else {
3838         __ movl(output_register, input_register);
3839         if (imm == -1) {
3840           __ negl(output_register);
3841         }
3842       }
3843       break;
3844     }
3845 
3846     case DataType::Type::kInt64: {
3847       if (instruction->IsRem()) {
3848         __ xorl(output_register, output_register);
3849       } else {
3850         __ movq(output_register, input_register);
3851         if (imm == -1) {
3852           __ negq(output_register);
3853         }
3854       }
3855       break;
3856     }
3857 
3858     default:
3859       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3860   }
3861 }
RemByPowerOfTwo(HRem * instruction)3862 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3863   LocationSummary* locations = instruction->GetLocations();
3864   Location second = locations->InAt(1);
3865   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3866   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3867   int64_t imm = Int64FromConstant(second.GetConstant());
3868   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3869   uint64_t abs_imm = AbsOrMin(imm);
3870   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3871   if (instruction->GetResultType() == DataType::Type::kInt32) {
3872     NearLabel done;
3873     __ movl(out, numerator);
3874     __ andl(out, Immediate(abs_imm-1));
3875     __ j(Condition::kZero, &done);
3876     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3877     __ testl(numerator, numerator);
3878     __ cmov(Condition::kLess, out, tmp, false);
3879     __ Bind(&done);
3880 
3881   } else {
3882     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3883     codegen_->Load64BitValue(tmp, abs_imm - 1);
3884     NearLabel done;
3885 
3886     __ movq(out, numerator);
3887     __ andq(out, tmp);
3888     __ j(Condition::kZero, &done);
3889     __ movq(tmp, numerator);
3890     __ sarq(tmp, Immediate(63));
3891     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3892     __ orq(out, tmp);
3893     __ Bind(&done);
3894   }
3895 }
DivByPowerOfTwo(HDiv * instruction)3896 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3897   LocationSummary* locations = instruction->GetLocations();
3898   Location second = locations->InAt(1);
3899 
3900   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3901   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3902 
3903   int64_t imm = Int64FromConstant(second.GetConstant());
3904   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3905   uint64_t abs_imm = AbsOrMin(imm);
3906 
3907   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3908 
3909   if (instruction->GetResultType() == DataType::Type::kInt32) {
3910     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3911     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3912     if (abs_imm == 2) {
3913       __ leal(tmp, Address(numerator, 0));
3914       __ shrl(tmp, Immediate(31));
3915       __ addl(tmp, numerator);
3916     } else {
3917       __ leal(tmp, Address(numerator, abs_imm - 1));
3918       __ testl(numerator, numerator);
3919       __ cmov(kGreaterEqual, tmp, numerator);
3920     }
3921     int shift = CTZ(imm);
3922     __ sarl(tmp, Immediate(shift));
3923 
3924     if (imm < 0) {
3925       __ negl(tmp);
3926     }
3927 
3928     __ movl(output_register, tmp);
3929   } else {
3930     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3931     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3932     if (abs_imm == 2) {
3933       __ movq(rdx, numerator);
3934       __ shrq(rdx, Immediate(63));
3935       __ addq(rdx, numerator);
3936     } else {
3937       codegen_->Load64BitValue(rdx, abs_imm - 1);
3938       __ addq(rdx, numerator);
3939       __ testq(numerator, numerator);
3940       __ cmov(kGreaterEqual, rdx, numerator);
3941     }
3942     int shift = CTZ(imm);
3943     __ sarq(rdx, Immediate(shift));
3944 
3945     if (imm < 0) {
3946       __ negq(rdx);
3947     }
3948 
3949     __ movq(output_register, rdx);
3950   }
3951 }
3952 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3953 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3954   DCHECK(instruction->IsDiv() || instruction->IsRem());
3955 
3956   LocationSummary* locations = instruction->GetLocations();
3957   Location second = locations->InAt(1);
3958 
3959   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3960       : locations->GetTemp(0).AsRegister<CpuRegister>();
3961   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3962   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3963       : locations->Out().AsRegister<CpuRegister>();
3964   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3965 
3966   DCHECK_EQ(RAX, eax.AsRegister());
3967   DCHECK_EQ(RDX, edx.AsRegister());
3968   if (instruction->IsDiv()) {
3969     DCHECK_EQ(RAX, out.AsRegister());
3970   } else {
3971     DCHECK_EQ(RDX, out.AsRegister());
3972   }
3973 
3974   int64_t magic;
3975   int shift;
3976 
3977   // TODO: can these branches be written as one?
3978   if (instruction->GetResultType() == DataType::Type::kInt32) {
3979     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3980 
3981     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3982 
3983     __ movl(numerator, eax);
3984 
3985     __ movl(eax, Immediate(magic));
3986     __ imull(numerator);
3987 
3988     if (imm > 0 && magic < 0) {
3989       __ addl(edx, numerator);
3990     } else if (imm < 0 && magic > 0) {
3991       __ subl(edx, numerator);
3992     }
3993 
3994     if (shift != 0) {
3995       __ sarl(edx, Immediate(shift));
3996     }
3997 
3998     __ movl(eax, edx);
3999     __ shrl(edx, Immediate(31));
4000     __ addl(edx, eax);
4001 
4002     if (instruction->IsRem()) {
4003       __ movl(eax, numerator);
4004       __ imull(edx, Immediate(imm));
4005       __ subl(eax, edx);
4006       __ movl(edx, eax);
4007     } else {
4008       __ movl(eax, edx);
4009     }
4010   } else {
4011     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4012 
4013     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4014 
4015     CpuRegister rax = eax;
4016     CpuRegister rdx = edx;
4017 
4018     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4019 
4020     // Save the numerator.
4021     __ movq(numerator, rax);
4022 
4023     // RAX = magic
4024     codegen_->Load64BitValue(rax, magic);
4025 
4026     // RDX:RAX = magic * numerator
4027     __ imulq(numerator);
4028 
4029     if (imm > 0 && magic < 0) {
4030       // RDX += numerator
4031       __ addq(rdx, numerator);
4032     } else if (imm < 0 && magic > 0) {
4033       // RDX -= numerator
4034       __ subq(rdx, numerator);
4035     }
4036 
4037     // Shift if needed.
4038     if (shift != 0) {
4039       __ sarq(rdx, Immediate(shift));
4040     }
4041 
4042     // RDX += 1 if RDX < 0
4043     __ movq(rax, rdx);
4044     __ shrq(rdx, Immediate(63));
4045     __ addq(rdx, rax);
4046 
4047     if (instruction->IsRem()) {
4048       __ movq(rax, numerator);
4049 
4050       if (IsInt<32>(imm)) {
4051         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4052       } else {
4053         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4054       }
4055 
4056       __ subq(rax, rdx);
4057       __ movq(rdx, rax);
4058     } else {
4059       __ movq(rax, rdx);
4060     }
4061   }
4062 }
4063 
GenerateDivRemIntegral(HBinaryOperation * instruction)4064 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4065   DCHECK(instruction->IsDiv() || instruction->IsRem());
4066   DataType::Type type = instruction->GetResultType();
4067   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4068 
4069   bool is_div = instruction->IsDiv();
4070   LocationSummary* locations = instruction->GetLocations();
4071 
4072   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4073   Location second = locations->InAt(1);
4074 
4075   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4076   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4077 
4078   if (second.IsConstant()) {
4079     int64_t imm = Int64FromConstant(second.GetConstant());
4080 
4081     if (imm == 0) {
4082       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4083     } else if (imm == 1 || imm == -1) {
4084       DivRemOneOrMinusOne(instruction);
4085     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4086       if (is_div) {
4087         DivByPowerOfTwo(instruction->AsDiv());
4088       } else {
4089         RemByPowerOfTwo(instruction->AsRem());
4090       }
4091     } else {
4092       DCHECK(imm <= -2 || imm >= 2);
4093       GenerateDivRemWithAnyConstant(instruction);
4094     }
4095   } else {
4096     SlowPathCode* slow_path =
4097         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4098             instruction, out.AsRegister(), type, is_div);
4099     codegen_->AddSlowPath(slow_path);
4100 
4101     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4102     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4103     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4104     // so it's safe to just use negl instead of more complex comparisons.
4105     if (type == DataType::Type::kInt32) {
4106       __ cmpl(second_reg, Immediate(-1));
4107       __ j(kEqual, slow_path->GetEntryLabel());
4108       // edx:eax <- sign-extended of eax
4109       __ cdq();
4110       // eax = quotient, edx = remainder
4111       __ idivl(second_reg);
4112     } else {
4113       __ cmpq(second_reg, Immediate(-1));
4114       __ j(kEqual, slow_path->GetEntryLabel());
4115       // rdx:rax <- sign-extended of rax
4116       __ cqo();
4117       // rax = quotient, rdx = remainder
4118       __ idivq(second_reg);
4119     }
4120     __ Bind(slow_path->GetExitLabel());
4121   }
4122 }
4123 
VisitDiv(HDiv * div)4124 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4125   LocationSummary* locations =
4126       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4127   switch (div->GetResultType()) {
4128     case DataType::Type::kInt32:
4129     case DataType::Type::kInt64: {
4130       locations->SetInAt(0, Location::RegisterLocation(RAX));
4131       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4132       locations->SetOut(Location::SameAsFirstInput());
4133       // Intel uses edx:eax as the dividend.
4134       locations->AddTemp(Location::RegisterLocation(RDX));
4135       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4136       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4137       // output and request another temp.
4138       if (div->InputAt(1)->IsConstant()) {
4139         locations->AddTemp(Location::RequiresRegister());
4140       }
4141       break;
4142     }
4143 
4144     case DataType::Type::kFloat32:
4145     case DataType::Type::kFloat64: {
4146       locations->SetInAt(0, Location::RequiresFpuRegister());
4147       locations->SetInAt(1, Location::Any());
4148       locations->SetOut(Location::SameAsFirstInput());
4149       break;
4150     }
4151 
4152     default:
4153       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4154   }
4155 }
4156 
VisitDiv(HDiv * div)4157 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4158   LocationSummary* locations = div->GetLocations();
4159   Location first = locations->InAt(0);
4160   Location second = locations->InAt(1);
4161   DCHECK(first.Equals(locations->Out()));
4162 
4163   DataType::Type type = div->GetResultType();
4164   switch (type) {
4165     case DataType::Type::kInt32:
4166     case DataType::Type::kInt64: {
4167       GenerateDivRemIntegral(div);
4168       break;
4169     }
4170 
4171     case DataType::Type::kFloat32: {
4172       if (second.IsFpuRegister()) {
4173         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4174       } else if (second.IsConstant()) {
4175         __ divss(first.AsFpuRegister<XmmRegister>(),
4176                  codegen_->LiteralFloatAddress(
4177                      second.GetConstant()->AsFloatConstant()->GetValue()));
4178       } else {
4179         DCHECK(second.IsStackSlot());
4180         __ divss(first.AsFpuRegister<XmmRegister>(),
4181                  Address(CpuRegister(RSP), second.GetStackIndex()));
4182       }
4183       break;
4184     }
4185 
4186     case DataType::Type::kFloat64: {
4187       if (second.IsFpuRegister()) {
4188         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4189       } else if (second.IsConstant()) {
4190         __ divsd(first.AsFpuRegister<XmmRegister>(),
4191                  codegen_->LiteralDoubleAddress(
4192                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4193       } else {
4194         DCHECK(second.IsDoubleStackSlot());
4195         __ divsd(first.AsFpuRegister<XmmRegister>(),
4196                  Address(CpuRegister(RSP), second.GetStackIndex()));
4197       }
4198       break;
4199     }
4200 
4201     default:
4202       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4203   }
4204 }
4205 
VisitRem(HRem * rem)4206 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4207   DataType::Type type = rem->GetResultType();
4208   LocationSummary* locations =
4209     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4210 
4211   switch (type) {
4212     case DataType::Type::kInt32:
4213     case DataType::Type::kInt64: {
4214       locations->SetInAt(0, Location::RegisterLocation(RAX));
4215       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4216       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4217       locations->SetOut(Location::RegisterLocation(RDX));
4218       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4219       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4220       // output and request another temp.
4221       if (rem->InputAt(1)->IsConstant()) {
4222         locations->AddTemp(Location::RequiresRegister());
4223       }
4224       break;
4225     }
4226 
4227     case DataType::Type::kFloat32:
4228     case DataType::Type::kFloat64: {
4229       locations->SetInAt(0, Location::Any());
4230       locations->SetInAt(1, Location::Any());
4231       locations->SetOut(Location::RequiresFpuRegister());
4232       locations->AddTemp(Location::RegisterLocation(RAX));
4233       break;
4234     }
4235 
4236     default:
4237       LOG(FATAL) << "Unexpected rem type " << type;
4238   }
4239 }
4240 
VisitRem(HRem * rem)4241 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4242   DataType::Type type = rem->GetResultType();
4243   switch (type) {
4244     case DataType::Type::kInt32:
4245     case DataType::Type::kInt64: {
4246       GenerateDivRemIntegral(rem);
4247       break;
4248     }
4249     case DataType::Type::kFloat32:
4250     case DataType::Type::kFloat64: {
4251       GenerateRemFP(rem);
4252       break;
4253     }
4254     default:
4255       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4256   }
4257 }
4258 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4259 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4260   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4261   switch (minmax->GetResultType()) {
4262     case DataType::Type::kInt32:
4263     case DataType::Type::kInt64:
4264       locations->SetInAt(0, Location::RequiresRegister());
4265       locations->SetInAt(1, Location::RequiresRegister());
4266       locations->SetOut(Location::SameAsFirstInput());
4267       break;
4268     case DataType::Type::kFloat32:
4269     case DataType::Type::kFloat64:
4270       locations->SetInAt(0, Location::RequiresFpuRegister());
4271       locations->SetInAt(1, Location::RequiresFpuRegister());
4272       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4273       // the second input to be the output (we can simply swap inputs).
4274       locations->SetOut(Location::SameAsFirstInput());
4275       break;
4276     default:
4277       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4278   }
4279 }
4280 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4281 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4282                                                        bool is_min,
4283                                                        DataType::Type type) {
4284   Location op1_loc = locations->InAt(0);
4285   Location op2_loc = locations->InAt(1);
4286 
4287   // Shortcut for same input locations.
4288   if (op1_loc.Equals(op2_loc)) {
4289     // Can return immediately, as op1_loc == out_loc.
4290     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4291     //       a copy here.
4292     DCHECK(locations->Out().Equals(op1_loc));
4293     return;
4294   }
4295 
4296   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4297   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4298 
4299   //  (out := op1)
4300   //  out <=? op2
4301   //  if out is min jmp done
4302   //  out := op2
4303   // done:
4304 
4305   if (type == DataType::Type::kInt64) {
4306     __ cmpq(out, op2);
4307     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4308   } else {
4309     DCHECK_EQ(type, DataType::Type::kInt32);
4310     __ cmpl(out, op2);
4311     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4312   }
4313 }
4314 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4315 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4316                                                       bool is_min,
4317                                                       DataType::Type type) {
4318   Location op1_loc = locations->InAt(0);
4319   Location op2_loc = locations->InAt(1);
4320   Location out_loc = locations->Out();
4321   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4322 
4323   // Shortcut for same input locations.
4324   if (op1_loc.Equals(op2_loc)) {
4325     DCHECK(out_loc.Equals(op1_loc));
4326     return;
4327   }
4328 
4329   //  (out := op1)
4330   //  out <=? op2
4331   //  if Nan jmp Nan_label
4332   //  if out is min jmp done
4333   //  if op2 is min jmp op2_label
4334   //  handle -0/+0
4335   //  jmp done
4336   // Nan_label:
4337   //  out := NaN
4338   // op2_label:
4339   //  out := op2
4340   // done:
4341   //
4342   // This removes one jmp, but needs to copy one input (op1) to out.
4343   //
4344   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4345 
4346   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4347 
4348   NearLabel nan, done, op2_label;
4349   if (type == DataType::Type::kFloat64) {
4350     __ ucomisd(out, op2);
4351   } else {
4352     DCHECK_EQ(type, DataType::Type::kFloat32);
4353     __ ucomiss(out, op2);
4354   }
4355 
4356   __ j(Condition::kParityEven, &nan);
4357 
4358   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4359   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4360 
4361   // Handle 0.0/-0.0.
4362   if (is_min) {
4363     if (type == DataType::Type::kFloat64) {
4364       __ orpd(out, op2);
4365     } else {
4366       __ orps(out, op2);
4367     }
4368   } else {
4369     if (type == DataType::Type::kFloat64) {
4370       __ andpd(out, op2);
4371     } else {
4372       __ andps(out, op2);
4373     }
4374   }
4375   __ jmp(&done);
4376 
4377   // NaN handling.
4378   __ Bind(&nan);
4379   if (type == DataType::Type::kFloat64) {
4380     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4381   } else {
4382     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4383   }
4384   __ jmp(&done);
4385 
4386   // out := op2;
4387   __ Bind(&op2_label);
4388   if (type == DataType::Type::kFloat64) {
4389     __ movsd(out, op2);
4390   } else {
4391     __ movss(out, op2);
4392   }
4393 
4394   // Done.
4395   __ Bind(&done);
4396 }
4397 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4398 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4399   DataType::Type type = minmax->GetResultType();
4400   switch (type) {
4401     case DataType::Type::kInt32:
4402     case DataType::Type::kInt64:
4403       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4404       break;
4405     case DataType::Type::kFloat32:
4406     case DataType::Type::kFloat64:
4407       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4408       break;
4409     default:
4410       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4411   }
4412 }
4413 
VisitMin(HMin * min)4414 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4415   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4416 }
4417 
VisitMin(HMin * min)4418 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4419   GenerateMinMax(min, /*is_min*/ true);
4420 }
4421 
VisitMax(HMax * max)4422 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4423   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4424 }
4425 
VisitMax(HMax * max)4426 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4427   GenerateMinMax(max, /*is_min*/ false);
4428 }
4429 
VisitAbs(HAbs * abs)4430 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4431   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4432   switch (abs->GetResultType()) {
4433     case DataType::Type::kInt32:
4434     case DataType::Type::kInt64:
4435       locations->SetInAt(0, Location::RequiresRegister());
4436       locations->SetOut(Location::SameAsFirstInput());
4437       locations->AddTemp(Location::RequiresRegister());
4438       break;
4439     case DataType::Type::kFloat32:
4440     case DataType::Type::kFloat64:
4441       locations->SetInAt(0, Location::RequiresFpuRegister());
4442       locations->SetOut(Location::SameAsFirstInput());
4443       locations->AddTemp(Location::RequiresFpuRegister());
4444       break;
4445     default:
4446       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4447   }
4448 }
4449 
VisitAbs(HAbs * abs)4450 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4451   LocationSummary* locations = abs->GetLocations();
4452   switch (abs->GetResultType()) {
4453     case DataType::Type::kInt32: {
4454       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4455       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4456       // Create mask.
4457       __ movl(mask, out);
4458       __ sarl(mask, Immediate(31));
4459       // Add mask.
4460       __ addl(out, mask);
4461       __ xorl(out, mask);
4462       break;
4463     }
4464     case DataType::Type::kInt64: {
4465       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4466       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4467       // Create mask.
4468       __ movq(mask, out);
4469       __ sarq(mask, Immediate(63));
4470       // Add mask.
4471       __ addq(out, mask);
4472       __ xorq(out, mask);
4473       break;
4474     }
4475     case DataType::Type::kFloat32: {
4476       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4477       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4478       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4479       __ andps(out, mask);
4480       break;
4481     }
4482     case DataType::Type::kFloat64: {
4483       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4484       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4485       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4486       __ andpd(out, mask);
4487       break;
4488     }
4489     default:
4490       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4491   }
4492 }
4493 
VisitDivZeroCheck(HDivZeroCheck * instruction)4494 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4495   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4496   locations->SetInAt(0, Location::Any());
4497 }
4498 
VisitDivZeroCheck(HDivZeroCheck * instruction)4499 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4500   SlowPathCode* slow_path =
4501       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4502   codegen_->AddSlowPath(slow_path);
4503 
4504   LocationSummary* locations = instruction->GetLocations();
4505   Location value = locations->InAt(0);
4506 
4507   switch (instruction->GetType()) {
4508     case DataType::Type::kBool:
4509     case DataType::Type::kUint8:
4510     case DataType::Type::kInt8:
4511     case DataType::Type::kUint16:
4512     case DataType::Type::kInt16:
4513     case DataType::Type::kInt32: {
4514       if (value.IsRegister()) {
4515         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4516         __ j(kEqual, slow_path->GetEntryLabel());
4517       } else if (value.IsStackSlot()) {
4518         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4519         __ j(kEqual, slow_path->GetEntryLabel());
4520       } else {
4521         DCHECK(value.IsConstant()) << value;
4522         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4523           __ jmp(slow_path->GetEntryLabel());
4524         }
4525       }
4526       break;
4527     }
4528     case DataType::Type::kInt64: {
4529       if (value.IsRegister()) {
4530         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4531         __ j(kEqual, slow_path->GetEntryLabel());
4532       } else if (value.IsDoubleStackSlot()) {
4533         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4534         __ j(kEqual, slow_path->GetEntryLabel());
4535       } else {
4536         DCHECK(value.IsConstant()) << value;
4537         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4538           __ jmp(slow_path->GetEntryLabel());
4539         }
4540       }
4541       break;
4542     }
4543     default:
4544       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4545   }
4546 }
4547 
HandleShift(HBinaryOperation * op)4548 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4549   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4550 
4551   LocationSummary* locations =
4552       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4553 
4554   switch (op->GetResultType()) {
4555     case DataType::Type::kInt32:
4556     case DataType::Type::kInt64: {
4557       locations->SetInAt(0, Location::RequiresRegister());
4558       // The shift count needs to be in CL.
4559       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4560       locations->SetOut(Location::SameAsFirstInput());
4561       break;
4562     }
4563     default:
4564       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4565   }
4566 }
4567 
HandleShift(HBinaryOperation * op)4568 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4569   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4570 
4571   LocationSummary* locations = op->GetLocations();
4572   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4573   Location second = locations->InAt(1);
4574 
4575   switch (op->GetResultType()) {
4576     case DataType::Type::kInt32: {
4577       if (second.IsRegister()) {
4578         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4579         if (op->IsShl()) {
4580           __ shll(first_reg, second_reg);
4581         } else if (op->IsShr()) {
4582           __ sarl(first_reg, second_reg);
4583         } else {
4584           __ shrl(first_reg, second_reg);
4585         }
4586       } else {
4587         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4588         if (op->IsShl()) {
4589           __ shll(first_reg, imm);
4590         } else if (op->IsShr()) {
4591           __ sarl(first_reg, imm);
4592         } else {
4593           __ shrl(first_reg, imm);
4594         }
4595       }
4596       break;
4597     }
4598     case DataType::Type::kInt64: {
4599       if (second.IsRegister()) {
4600         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4601         if (op->IsShl()) {
4602           __ shlq(first_reg, second_reg);
4603         } else if (op->IsShr()) {
4604           __ sarq(first_reg, second_reg);
4605         } else {
4606           __ shrq(first_reg, second_reg);
4607         }
4608       } else {
4609         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4610         if (op->IsShl()) {
4611           __ shlq(first_reg, imm);
4612         } else if (op->IsShr()) {
4613           __ sarq(first_reg, imm);
4614         } else {
4615           __ shrq(first_reg, imm);
4616         }
4617       }
4618       break;
4619     }
4620     default:
4621       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4622       UNREACHABLE();
4623   }
4624 }
4625 
VisitRor(HRor * ror)4626 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4627   LocationSummary* locations =
4628       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4629 
4630   switch (ror->GetResultType()) {
4631     case DataType::Type::kInt32:
4632     case DataType::Type::kInt64: {
4633       locations->SetInAt(0, Location::RequiresRegister());
4634       // The shift count needs to be in CL (unless it is a constant).
4635       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4636       locations->SetOut(Location::SameAsFirstInput());
4637       break;
4638     }
4639     default:
4640       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4641       UNREACHABLE();
4642   }
4643 }
4644 
VisitRor(HRor * ror)4645 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4646   LocationSummary* locations = ror->GetLocations();
4647   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4648   Location second = locations->InAt(1);
4649 
4650   switch (ror->GetResultType()) {
4651     case DataType::Type::kInt32:
4652       if (second.IsRegister()) {
4653         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4654         __ rorl(first_reg, second_reg);
4655       } else {
4656         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4657         __ rorl(first_reg, imm);
4658       }
4659       break;
4660     case DataType::Type::kInt64:
4661       if (second.IsRegister()) {
4662         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4663         __ rorq(first_reg, second_reg);
4664       } else {
4665         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4666         __ rorq(first_reg, imm);
4667       }
4668       break;
4669     default:
4670       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4671       UNREACHABLE();
4672   }
4673 }
4674 
VisitShl(HShl * shl)4675 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4676   HandleShift(shl);
4677 }
4678 
VisitShl(HShl * shl)4679 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4680   HandleShift(shl);
4681 }
4682 
VisitShr(HShr * shr)4683 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4684   HandleShift(shr);
4685 }
4686 
VisitShr(HShr * shr)4687 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4688   HandleShift(shr);
4689 }
4690 
VisitUShr(HUShr * ushr)4691 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4692   HandleShift(ushr);
4693 }
4694 
VisitUShr(HUShr * ushr)4695 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4696   HandleShift(ushr);
4697 }
4698 
VisitNewInstance(HNewInstance * instruction)4699 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4700   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4701       instruction, LocationSummary::kCallOnMainOnly);
4702   InvokeRuntimeCallingConvention calling_convention;
4703   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4704   locations->SetOut(Location::RegisterLocation(RAX));
4705 }
4706 
VisitNewInstance(HNewInstance * instruction)4707 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4708   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4709   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4710   DCHECK(!codegen_->IsLeafMethod());
4711 }
4712 
VisitNewArray(HNewArray * instruction)4713 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4714   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4715       instruction, LocationSummary::kCallOnMainOnly);
4716   InvokeRuntimeCallingConvention calling_convention;
4717   locations->SetOut(Location::RegisterLocation(RAX));
4718   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4719   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4720 }
4721 
VisitNewArray(HNewArray * instruction)4722 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4723   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4724   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4725   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4726   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4727   DCHECK(!codegen_->IsLeafMethod());
4728 }
4729 
VisitParameterValue(HParameterValue * instruction)4730 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4731   LocationSummary* locations =
4732       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4733   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4734   if (location.IsStackSlot()) {
4735     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4736   } else if (location.IsDoubleStackSlot()) {
4737     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4738   }
4739   locations->SetOut(location);
4740 }
4741 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4742 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4743     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4744   // Nothing to do, the parameter is already at its location.
4745 }
4746 
VisitCurrentMethod(HCurrentMethod * instruction)4747 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4748   LocationSummary* locations =
4749       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4750   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4751 }
4752 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4753 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4754     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4755   // Nothing to do, the method is already at its location.
4756 }
4757 
VisitClassTableGet(HClassTableGet * instruction)4758 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4759   LocationSummary* locations =
4760       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4761   locations->SetInAt(0, Location::RequiresRegister());
4762   locations->SetOut(Location::RequiresRegister());
4763 }
4764 
VisitClassTableGet(HClassTableGet * instruction)4765 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4766   LocationSummary* locations = instruction->GetLocations();
4767   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4768     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4769         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4770     __ movq(locations->Out().AsRegister<CpuRegister>(),
4771             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4772   } else {
4773     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4774         instruction->GetIndex(), kX86_64PointerSize));
4775     __ movq(locations->Out().AsRegister<CpuRegister>(),
4776             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4777             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4778     __ movq(locations->Out().AsRegister<CpuRegister>(),
4779             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4780   }
4781 }
4782 
VisitNot(HNot * not_)4783 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4784   LocationSummary* locations =
4785       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4786   locations->SetInAt(0, Location::RequiresRegister());
4787   locations->SetOut(Location::SameAsFirstInput());
4788 }
4789 
VisitNot(HNot * not_)4790 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4791   LocationSummary* locations = not_->GetLocations();
4792   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4793             locations->Out().AsRegister<CpuRegister>().AsRegister());
4794   Location out = locations->Out();
4795   switch (not_->GetResultType()) {
4796     case DataType::Type::kInt32:
4797       __ notl(out.AsRegister<CpuRegister>());
4798       break;
4799 
4800     case DataType::Type::kInt64:
4801       __ notq(out.AsRegister<CpuRegister>());
4802       break;
4803 
4804     default:
4805       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4806   }
4807 }
4808 
VisitBooleanNot(HBooleanNot * bool_not)4809 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4810   LocationSummary* locations =
4811       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4812   locations->SetInAt(0, Location::RequiresRegister());
4813   locations->SetOut(Location::SameAsFirstInput());
4814 }
4815 
VisitBooleanNot(HBooleanNot * bool_not)4816 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4817   LocationSummary* locations = bool_not->GetLocations();
4818   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4819             locations->Out().AsRegister<CpuRegister>().AsRegister());
4820   Location out = locations->Out();
4821   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4822 }
4823 
VisitPhi(HPhi * instruction)4824 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4825   LocationSummary* locations =
4826       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4827   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4828     locations->SetInAt(i, Location::Any());
4829   }
4830   locations->SetOut(Location::Any());
4831 }
4832 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4833 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4834   LOG(FATAL) << "Unimplemented";
4835 }
4836 
GenerateMemoryBarrier(MemBarrierKind kind)4837 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4838   /*
4839    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4840    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4841    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4842    */
4843   switch (kind) {
4844     case MemBarrierKind::kAnyAny: {
4845       MemoryFence();
4846       break;
4847     }
4848     case MemBarrierKind::kAnyStore:
4849     case MemBarrierKind::kLoadAny:
4850     case MemBarrierKind::kStoreStore: {
4851       // nop
4852       break;
4853     }
4854     case MemBarrierKind::kNTStoreStore:
4855       // Non-Temporal Store/Store needs an explicit fence.
4856       MemoryFence(/* non-temporal= */ true);
4857       break;
4858   }
4859 }
4860 
HandleFieldGet(HInstruction * instruction)4861 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4862   DCHECK(instruction->IsInstanceFieldGet() ||
4863          instruction->IsStaticFieldGet() ||
4864          instruction->IsPredicatedInstanceFieldGet());
4865 
4866   bool object_field_get_with_read_barrier =
4867       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4868   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
4869   LocationSummary* locations =
4870       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4871                                                        object_field_get_with_read_barrier
4872                                                            ? LocationSummary::kCallOnSlowPath
4873                                                            : LocationSummary::kNoCall);
4874   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4875     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4876   }
4877   // receiver_input
4878   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
4879   if (is_predicated) {
4880     if (DataType::IsFloatingPointType(instruction->GetType())) {
4881       locations->SetInAt(0, Location::RequiresFpuRegister());
4882     } else {
4883       locations->SetInAt(0, Location::RequiresRegister());
4884     }
4885   }
4886   if (DataType::IsFloatingPointType(instruction->GetType())) {
4887     locations->SetOut(is_predicated ? Location::SameAsFirstInput()
4888                                     : Location::RequiresFpuRegister());
4889   } else {
4890     // The output overlaps for an object field get when read barriers are
4891     // enabled: we do not want the move to overwrite the object's location, as
4892     // we need it to emit the read barrier. For predicated instructions we can
4893     // always overlap since the output is SameAsFirst and the default value.
4894     locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
4895                       object_field_get_with_read_barrier || is_predicated
4896                           ? Location::kOutputOverlap
4897                           : Location::kNoOutputOverlap);
4898   }
4899 }
4900 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4901 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4902                                                     const FieldInfo& field_info) {
4903   DCHECK(instruction->IsInstanceFieldGet() ||
4904          instruction->IsStaticFieldGet() ||
4905          instruction->IsPredicatedInstanceFieldGet());
4906 
4907   LocationSummary* locations = instruction->GetLocations();
4908   Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
4909   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4910   Location out = locations->Out();
4911   bool is_volatile = field_info.IsVolatile();
4912   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4913   DataType::Type load_type = instruction->GetType();
4914   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4915 
4916   switch (load_type) {
4917     case DataType::Type::kBool:
4918     case DataType::Type::kUint8: {
4919       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4920       break;
4921     }
4922 
4923     case DataType::Type::kInt8: {
4924       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4925       break;
4926     }
4927 
4928     case DataType::Type::kUint16: {
4929       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4930       break;
4931     }
4932 
4933     case DataType::Type::kInt16: {
4934       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4935       break;
4936     }
4937 
4938     case DataType::Type::kInt32: {
4939       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4940       break;
4941     }
4942 
4943     case DataType::Type::kReference: {
4944       // /* HeapReference<Object> */ out = *(base + offset)
4945       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4946         // Note that a potential implicit null check is handled in this
4947         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4948         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4949             instruction, out, base, offset, /* needs_null_check= */ true);
4950         if (is_volatile) {
4951           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4952         }
4953       } else {
4954         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4955         codegen_->MaybeRecordImplicitNullCheck(instruction);
4956         if (is_volatile) {
4957           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4958         }
4959         // If read barriers are enabled, emit read barriers other than
4960         // Baker's using a slow path (and also unpoison the loaded
4961         // reference, if heap poisoning is enabled).
4962         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4963       }
4964       break;
4965     }
4966 
4967     case DataType::Type::kInt64: {
4968       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4969       break;
4970     }
4971 
4972     case DataType::Type::kFloat32: {
4973       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4974       break;
4975     }
4976 
4977     case DataType::Type::kFloat64: {
4978       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4979       break;
4980     }
4981 
4982     case DataType::Type::kUint32:
4983     case DataType::Type::kUint64:
4984     case DataType::Type::kVoid:
4985       LOG(FATAL) << "Unreachable type " << load_type;
4986       UNREACHABLE();
4987   }
4988 
4989   if (load_type == DataType::Type::kReference) {
4990     // Potential implicit null checks, in the case of reference
4991     // fields, are handled in the previous switch statement.
4992   } else {
4993     codegen_->MaybeRecordImplicitNullCheck(instruction);
4994   }
4995 
4996   if (is_volatile) {
4997     if (load_type == DataType::Type::kReference) {
4998       // Memory barriers, in the case of references, are also handled
4999       // in the previous switch statement.
5000     } else {
5001       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5002     }
5003   }
5004 }
5005 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5006 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5007                                             const FieldInfo& field_info) {
5008   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5009 
5010   LocationSummary* locations =
5011       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5012   DataType::Type field_type = field_info.GetFieldType();
5013   bool is_volatile = field_info.IsVolatile();
5014   bool needs_write_barrier =
5015       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5016 
5017   locations->SetInAt(0, Location::RequiresRegister());
5018   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5019     if (is_volatile) {
5020       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5021       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5022     } else {
5023       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5024     }
5025   } else {
5026     if (is_volatile) {
5027       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5028       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5029     } else {
5030       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5031     }
5032   }
5033   if (needs_write_barrier) {
5034     // Temporary registers for the write barrier.
5035     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
5036     locations->AddTemp(Location::RequiresRegister());
5037   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5038     // Temporary register for the reference poisoning.
5039     locations->AddTemp(Location::RequiresRegister());
5040   }
5041 }
5042 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5043 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5044                                                     const FieldInfo& field_info,
5045                                                     bool value_can_be_null) {
5046   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5047 
5048   LocationSummary* locations = instruction->GetLocations();
5049   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5050   Location value = locations->InAt(1);
5051   bool is_volatile = field_info.IsVolatile();
5052   DataType::Type field_type = field_info.GetFieldType();
5053   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5054   bool is_predicated =
5055       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5056 
5057   if (is_volatile) {
5058     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5059   }
5060 
5061   bool maybe_record_implicit_null_check_done = false;
5062 
5063   NearLabel pred_is_null;
5064   if (is_predicated) {
5065     __ testl(base, base);
5066     __ j(kZero, &pred_is_null);
5067   }
5068 
5069   switch (field_type) {
5070     case DataType::Type::kBool:
5071     case DataType::Type::kUint8:
5072     case DataType::Type::kInt8: {
5073       if (value.IsConstant()) {
5074         __ movb(Address(base, offset),
5075                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5076       } else {
5077         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
5078       }
5079       break;
5080     }
5081 
5082     case DataType::Type::kUint16:
5083     case DataType::Type::kInt16: {
5084       if (value.IsConstant()) {
5085         __ movw(Address(base, offset),
5086                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5087       } else {
5088         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
5089       }
5090       break;
5091     }
5092 
5093     case DataType::Type::kInt32:
5094     case DataType::Type::kReference: {
5095       if (value.IsConstant()) {
5096         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5097         // `field_type == DataType::Type::kReference` implies `v == 0`.
5098         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
5099         // Note: if heap poisoning is enabled, no need to poison
5100         // (negate) `v` if it is a reference, as it would be null.
5101         __ movl(Address(base, offset), Immediate(v));
5102       } else {
5103         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5104           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5105           __ movl(temp, value.AsRegister<CpuRegister>());
5106           __ PoisonHeapReference(temp);
5107           __ movl(Address(base, offset), temp);
5108         } else {
5109           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
5110         }
5111       }
5112       break;
5113     }
5114 
5115     case DataType::Type::kInt64: {
5116       if (value.IsConstant()) {
5117         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5118         codegen_->MoveInt64ToAddress(Address(base, offset),
5119                                      Address(base, offset + sizeof(int32_t)),
5120                                      v,
5121                                      instruction);
5122         maybe_record_implicit_null_check_done = true;
5123       } else {
5124         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
5125       }
5126       break;
5127     }
5128 
5129     case DataType::Type::kFloat32: {
5130       if (value.IsConstant()) {
5131         int32_t v =
5132             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5133         __ movl(Address(base, offset), Immediate(v));
5134       } else {
5135         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5136       }
5137       break;
5138     }
5139 
5140     case DataType::Type::kFloat64: {
5141       if (value.IsConstant()) {
5142         int64_t v =
5143             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5144         codegen_->MoveInt64ToAddress(Address(base, offset),
5145                                      Address(base, offset + sizeof(int32_t)),
5146                                      v,
5147                                      instruction);
5148         maybe_record_implicit_null_check_done = true;
5149       } else {
5150         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5151       }
5152       break;
5153     }
5154 
5155     case DataType::Type::kUint32:
5156     case DataType::Type::kUint64:
5157     case DataType::Type::kVoid:
5158       LOG(FATAL) << "Unreachable type " << field_type;
5159       UNREACHABLE();
5160   }
5161 
5162   if (!maybe_record_implicit_null_check_done) {
5163     codegen_->MaybeRecordImplicitNullCheck(instruction);
5164   }
5165 
5166   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5167     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5168     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5169     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
5170   }
5171 
5172   if (is_volatile) {
5173     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5174   }
5175 
5176   if (is_predicated) {
5177     __ Bind(&pred_is_null);
5178   }
5179 }
5180 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5181 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5182   HandleFieldSet(instruction, instruction->GetFieldInfo());
5183 }
5184 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5185 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5186   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5187 }
5188 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5189 void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
5190     HPredicatedInstanceFieldGet* instruction) {
5191   HandleFieldGet(instruction);
5192 }
5193 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5194 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5195   HandleFieldGet(instruction);
5196 }
5197 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5198 void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
5199     HPredicatedInstanceFieldGet* instruction) {
5200   NearLabel finish;
5201   LocationSummary* locations = instruction->GetLocations();
5202   CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
5203   __ testl(target, target);
5204   __ j(kZero, &finish);
5205   HandleFieldGet(instruction, instruction->GetFieldInfo());
5206   __ Bind(&finish);
5207 }
5208 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5209 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5210   HandleFieldGet(instruction, instruction->GetFieldInfo());
5211 }
5212 
VisitStaticFieldGet(HStaticFieldGet * instruction)5213 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5214   HandleFieldGet(instruction);
5215 }
5216 
VisitStaticFieldGet(HStaticFieldGet * instruction)5217 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5218   HandleFieldGet(instruction, instruction->GetFieldInfo());
5219 }
5220 
VisitStaticFieldSet(HStaticFieldSet * instruction)5221 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5222   HandleFieldSet(instruction, instruction->GetFieldInfo());
5223 }
5224 
VisitStaticFieldSet(HStaticFieldSet * instruction)5225 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5226   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5227 }
5228 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5229 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5230   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5231 }
5232 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5233 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5234   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5235   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5236 }
5237 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5238 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5239     HUnresolvedInstanceFieldGet* instruction) {
5240   FieldAccessCallingConventionX86_64 calling_convention;
5241   codegen_->CreateUnresolvedFieldLocationSummary(
5242       instruction, instruction->GetFieldType(), calling_convention);
5243 }
5244 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5245 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5246     HUnresolvedInstanceFieldGet* instruction) {
5247   FieldAccessCallingConventionX86_64 calling_convention;
5248   codegen_->GenerateUnresolvedFieldAccess(instruction,
5249                                           instruction->GetFieldType(),
5250                                           instruction->GetFieldIndex(),
5251                                           instruction->GetDexPc(),
5252                                           calling_convention);
5253 }
5254 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5255 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5256     HUnresolvedInstanceFieldSet* instruction) {
5257   FieldAccessCallingConventionX86_64 calling_convention;
5258   codegen_->CreateUnresolvedFieldLocationSummary(
5259       instruction, instruction->GetFieldType(), calling_convention);
5260 }
5261 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5262 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5263     HUnresolvedInstanceFieldSet* instruction) {
5264   FieldAccessCallingConventionX86_64 calling_convention;
5265   codegen_->GenerateUnresolvedFieldAccess(instruction,
5266                                           instruction->GetFieldType(),
5267                                           instruction->GetFieldIndex(),
5268                                           instruction->GetDexPc(),
5269                                           calling_convention);
5270 }
5271 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5272 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5273     HUnresolvedStaticFieldGet* instruction) {
5274   FieldAccessCallingConventionX86_64 calling_convention;
5275   codegen_->CreateUnresolvedFieldLocationSummary(
5276       instruction, instruction->GetFieldType(), calling_convention);
5277 }
5278 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5279 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5280     HUnresolvedStaticFieldGet* instruction) {
5281   FieldAccessCallingConventionX86_64 calling_convention;
5282   codegen_->GenerateUnresolvedFieldAccess(instruction,
5283                                           instruction->GetFieldType(),
5284                                           instruction->GetFieldIndex(),
5285                                           instruction->GetDexPc(),
5286                                           calling_convention);
5287 }
5288 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5289 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5290     HUnresolvedStaticFieldSet* instruction) {
5291   FieldAccessCallingConventionX86_64 calling_convention;
5292   codegen_->CreateUnresolvedFieldLocationSummary(
5293       instruction, instruction->GetFieldType(), calling_convention);
5294 }
5295 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5296 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5297     HUnresolvedStaticFieldSet* instruction) {
5298   FieldAccessCallingConventionX86_64 calling_convention;
5299   codegen_->GenerateUnresolvedFieldAccess(instruction,
5300                                           instruction->GetFieldType(),
5301                                           instruction->GetFieldIndex(),
5302                                           instruction->GetDexPc(),
5303                                           calling_convention);
5304 }
5305 
VisitNullCheck(HNullCheck * instruction)5306 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5307   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5308   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5309       ? Location::RequiresRegister()
5310       : Location::Any();
5311   locations->SetInAt(0, loc);
5312 }
5313 
GenerateImplicitNullCheck(HNullCheck * instruction)5314 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5315   if (CanMoveNullCheckToUser(instruction)) {
5316     return;
5317   }
5318   LocationSummary* locations = instruction->GetLocations();
5319   Location obj = locations->InAt(0);
5320 
5321   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5322   RecordPcInfo(instruction, instruction->GetDexPc());
5323 }
5324 
GenerateExplicitNullCheck(HNullCheck * instruction)5325 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5326   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5327   AddSlowPath(slow_path);
5328 
5329   LocationSummary* locations = instruction->GetLocations();
5330   Location obj = locations->InAt(0);
5331 
5332   if (obj.IsRegister()) {
5333     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5334   } else if (obj.IsStackSlot()) {
5335     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5336   } else {
5337     DCHECK(obj.IsConstant()) << obj;
5338     DCHECK(obj.GetConstant()->IsNullConstant());
5339     __ jmp(slow_path->GetEntryLabel());
5340     return;
5341   }
5342   __ j(kEqual, slow_path->GetEntryLabel());
5343 }
5344 
VisitNullCheck(HNullCheck * instruction)5345 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5346   codegen_->GenerateNullCheck(instruction);
5347 }
5348 
VisitArrayGet(HArrayGet * instruction)5349 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5350   bool object_array_get_with_read_barrier =
5351       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5352   LocationSummary* locations =
5353       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5354                                                        object_array_get_with_read_barrier
5355                                                            ? LocationSummary::kCallOnSlowPath
5356                                                            : LocationSummary::kNoCall);
5357   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5358     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5359   }
5360   locations->SetInAt(0, Location::RequiresRegister());
5361   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5362   if (DataType::IsFloatingPointType(instruction->GetType())) {
5363     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5364   } else {
5365     // The output overlaps for an object array get when read barriers
5366     // are enabled: we do not want the move to overwrite the array's
5367     // location, as we need it to emit the read barrier.
5368     locations->SetOut(
5369         Location::RequiresRegister(),
5370         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5371   }
5372 }
5373 
VisitArrayGet(HArrayGet * instruction)5374 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5375   LocationSummary* locations = instruction->GetLocations();
5376   Location obj_loc = locations->InAt(0);
5377   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5378   Location index = locations->InAt(1);
5379   Location out_loc = locations->Out();
5380   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5381 
5382   DataType::Type type = instruction->GetType();
5383   switch (type) {
5384     case DataType::Type::kBool:
5385     case DataType::Type::kUint8: {
5386       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5387       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5388       break;
5389     }
5390 
5391     case DataType::Type::kInt8: {
5392       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5393       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5394       break;
5395     }
5396 
5397     case DataType::Type::kUint16: {
5398       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5399       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5400         // Branch cases into compressed and uncompressed for each index's type.
5401         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5402         NearLabel done, not_compressed;
5403         __ testb(Address(obj, count_offset), Immediate(1));
5404         codegen_->MaybeRecordImplicitNullCheck(instruction);
5405         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5406                       "Expecting 0=compressed, 1=uncompressed");
5407         __ j(kNotZero, &not_compressed);
5408         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5409         __ jmp(&done);
5410         __ Bind(&not_compressed);
5411         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5412         __ Bind(&done);
5413       } else {
5414         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5415       }
5416       break;
5417     }
5418 
5419     case DataType::Type::kInt16: {
5420       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5421       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5422       break;
5423     }
5424 
5425     case DataType::Type::kInt32: {
5426       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5427       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5428       break;
5429     }
5430 
5431     case DataType::Type::kReference: {
5432       static_assert(
5433           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5434           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5435       // /* HeapReference<Object> */ out =
5436       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5437       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5438         // Note that a potential implicit null check is handled in this
5439         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5440         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5441             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5442       } else {
5443         CpuRegister out = out_loc.AsRegister<CpuRegister>();
5444         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5445         codegen_->MaybeRecordImplicitNullCheck(instruction);
5446         // If read barriers are enabled, emit read barriers other than
5447         // Baker's using a slow path (and also unpoison the loaded
5448         // reference, if heap poisoning is enabled).
5449         if (index.IsConstant()) {
5450           uint32_t offset =
5451               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5452           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5453         } else {
5454           codegen_->MaybeGenerateReadBarrierSlow(
5455               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5456         }
5457       }
5458       break;
5459     }
5460 
5461     case DataType::Type::kInt64: {
5462       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5463       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5464       break;
5465     }
5466 
5467     case DataType::Type::kFloat32: {
5468       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5469       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5470       break;
5471     }
5472 
5473     case DataType::Type::kFloat64: {
5474       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5475       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5476       break;
5477     }
5478 
5479     case DataType::Type::kUint32:
5480     case DataType::Type::kUint64:
5481     case DataType::Type::kVoid:
5482       LOG(FATAL) << "Unreachable type " << type;
5483       UNREACHABLE();
5484   }
5485 
5486   if (type == DataType::Type::kReference) {
5487     // Potential implicit null checks, in the case of reference
5488     // arrays, are handled in the previous switch statement.
5489   } else {
5490     codegen_->MaybeRecordImplicitNullCheck(instruction);
5491   }
5492 }
5493 
VisitArraySet(HArraySet * instruction)5494 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5495   DataType::Type value_type = instruction->GetComponentType();
5496 
5497   bool needs_write_barrier =
5498       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5499   bool needs_type_check = instruction->NeedsTypeCheck();
5500 
5501   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5502       instruction,
5503       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5504 
5505   locations->SetInAt(0, Location::RequiresRegister());
5506   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5507   if (DataType::IsFloatingPointType(value_type)) {
5508     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5509   } else {
5510     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5511   }
5512 
5513   if (needs_write_barrier) {
5514     // Temporary registers for the write barrier.
5515     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5516     locations->AddTemp(Location::RequiresRegister());
5517   }
5518 }
5519 
VisitArraySet(HArraySet * instruction)5520 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5521   LocationSummary* locations = instruction->GetLocations();
5522   Location array_loc = locations->InAt(0);
5523   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5524   Location index = locations->InAt(1);
5525   Location value = locations->InAt(2);
5526   DataType::Type value_type = instruction->GetComponentType();
5527   bool needs_type_check = instruction->NeedsTypeCheck();
5528   bool needs_write_barrier =
5529       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5530 
5531   switch (value_type) {
5532     case DataType::Type::kBool:
5533     case DataType::Type::kUint8:
5534     case DataType::Type::kInt8: {
5535       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5536       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5537       if (value.IsRegister()) {
5538         __ movb(address, value.AsRegister<CpuRegister>());
5539       } else {
5540         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5541       }
5542       codegen_->MaybeRecordImplicitNullCheck(instruction);
5543       break;
5544     }
5545 
5546     case DataType::Type::kUint16:
5547     case DataType::Type::kInt16: {
5548       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5549       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5550       if (value.IsRegister()) {
5551         __ movw(address, value.AsRegister<CpuRegister>());
5552       } else {
5553         DCHECK(value.IsConstant()) << value;
5554         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5555       }
5556       codegen_->MaybeRecordImplicitNullCheck(instruction);
5557       break;
5558     }
5559 
5560     case DataType::Type::kReference: {
5561       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5562       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5563 
5564       if (!value.IsRegister()) {
5565         // Just setting null.
5566         DCHECK(instruction->InputAt(2)->IsNullConstant());
5567         DCHECK(value.IsConstant()) << value;
5568         __ movl(address, Immediate(0));
5569         codegen_->MaybeRecordImplicitNullCheck(instruction);
5570         DCHECK(!needs_write_barrier);
5571         DCHECK(!needs_type_check);
5572         break;
5573       }
5574 
5575       DCHECK(needs_write_barrier);
5576       CpuRegister register_value = value.AsRegister<CpuRegister>();
5577       Location temp_loc = locations->GetTemp(0);
5578       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5579 
5580       bool can_value_be_null = instruction->GetValueCanBeNull();
5581       NearLabel do_store;
5582       if (can_value_be_null) {
5583         __ testl(register_value, register_value);
5584         __ j(kEqual, &do_store);
5585       }
5586 
5587       SlowPathCode* slow_path = nullptr;
5588       if (needs_type_check) {
5589         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5590         codegen_->AddSlowPath(slow_path);
5591 
5592         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5593         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5594         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5595 
5596         // Note that when Baker read barriers are enabled, the type
5597         // checks are performed without read barriers.  This is fine,
5598         // even in the case where a class object is in the from-space
5599         // after the flip, as a comparison involving such a type would
5600         // not produce a false positive; it may of course produce a
5601         // false negative, in which case we would take the ArraySet
5602         // slow path.
5603 
5604         // /* HeapReference<Class> */ temp = array->klass_
5605         __ movl(temp, Address(array, class_offset));
5606         codegen_->MaybeRecordImplicitNullCheck(instruction);
5607         __ MaybeUnpoisonHeapReference(temp);
5608 
5609         // /* HeapReference<Class> */ temp = temp->component_type_
5610         __ movl(temp, Address(temp, component_offset));
5611         // If heap poisoning is enabled, no need to unpoison `temp`
5612         // nor the object reference in `register_value->klass`, as
5613         // we are comparing two poisoned references.
5614         __ cmpl(temp, Address(register_value, class_offset));
5615 
5616         if (instruction->StaticTypeOfArrayIsObjectArray()) {
5617           NearLabel do_put;
5618           __ j(kEqual, &do_put);
5619           // If heap poisoning is enabled, the `temp` reference has
5620           // not been unpoisoned yet; unpoison it now.
5621           __ MaybeUnpoisonHeapReference(temp);
5622 
5623           // If heap poisoning is enabled, no need to unpoison the
5624           // heap reference loaded below, as it is only used for a
5625           // comparison with null.
5626           __ cmpl(Address(temp, super_offset), Immediate(0));
5627           __ j(kNotEqual, slow_path->GetEntryLabel());
5628           __ Bind(&do_put);
5629         } else {
5630           __ j(kNotEqual, slow_path->GetEntryLabel());
5631         }
5632       }
5633 
5634       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5635       codegen_->MarkGCCard(
5636           temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5637 
5638       if (can_value_be_null) {
5639         DCHECK(do_store.IsLinked());
5640         __ Bind(&do_store);
5641       }
5642 
5643       Location source = value;
5644       if (kPoisonHeapReferences) {
5645         __ movl(temp, register_value);
5646         __ PoisonHeapReference(temp);
5647         source = temp_loc;
5648       }
5649 
5650       __ movl(address, source.AsRegister<CpuRegister>());
5651 
5652       if (can_value_be_null || !needs_type_check) {
5653         codegen_->MaybeRecordImplicitNullCheck(instruction);
5654       }
5655 
5656       if (slow_path != nullptr) {
5657         __ Bind(slow_path->GetExitLabel());
5658       }
5659 
5660       break;
5661     }
5662 
5663     case DataType::Type::kInt32: {
5664       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5665       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5666       if (value.IsRegister()) {
5667         __ movl(address, value.AsRegister<CpuRegister>());
5668       } else {
5669         DCHECK(value.IsConstant()) << value;
5670         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5671         __ movl(address, Immediate(v));
5672       }
5673       codegen_->MaybeRecordImplicitNullCheck(instruction);
5674       break;
5675     }
5676 
5677     case DataType::Type::kInt64: {
5678       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5679       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5680       if (value.IsRegister()) {
5681         __ movq(address, value.AsRegister<CpuRegister>());
5682         codegen_->MaybeRecordImplicitNullCheck(instruction);
5683       } else {
5684         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5685         Address address_high =
5686             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5687         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5688       }
5689       break;
5690     }
5691 
5692     case DataType::Type::kFloat32: {
5693       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5694       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5695       if (value.IsFpuRegister()) {
5696         __ movss(address, value.AsFpuRegister<XmmRegister>());
5697       } else {
5698         DCHECK(value.IsConstant());
5699         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5700         __ movl(address, Immediate(v));
5701       }
5702       codegen_->MaybeRecordImplicitNullCheck(instruction);
5703       break;
5704     }
5705 
5706     case DataType::Type::kFloat64: {
5707       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5708       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5709       if (value.IsFpuRegister()) {
5710         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5711         codegen_->MaybeRecordImplicitNullCheck(instruction);
5712       } else {
5713         int64_t v =
5714             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5715         Address address_high =
5716             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5717         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5718       }
5719       break;
5720     }
5721 
5722     case DataType::Type::kUint32:
5723     case DataType::Type::kUint64:
5724     case DataType::Type::kVoid:
5725       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5726       UNREACHABLE();
5727   }
5728 }
5729 
VisitArrayLength(HArrayLength * instruction)5730 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5731   LocationSummary* locations =
5732       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5733   locations->SetInAt(0, Location::RequiresRegister());
5734   if (!instruction->IsEmittedAtUseSite()) {
5735     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5736   }
5737 }
5738 
VisitArrayLength(HArrayLength * instruction)5739 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5740   if (instruction->IsEmittedAtUseSite()) {
5741     return;
5742   }
5743 
5744   LocationSummary* locations = instruction->GetLocations();
5745   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5746   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5747   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5748   __ movl(out, Address(obj, offset));
5749   codegen_->MaybeRecordImplicitNullCheck(instruction);
5750   // Mask out most significant bit in case the array is String's array of char.
5751   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5752     __ shrl(out, Immediate(1));
5753   }
5754 }
5755 
VisitBoundsCheck(HBoundsCheck * instruction)5756 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5757   RegisterSet caller_saves = RegisterSet::Empty();
5758   InvokeRuntimeCallingConvention calling_convention;
5759   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5760   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5761   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5762   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5763   HInstruction* length = instruction->InputAt(1);
5764   if (!length->IsEmittedAtUseSite()) {
5765     locations->SetInAt(1, Location::RegisterOrConstant(length));
5766   }
5767 }
5768 
VisitBoundsCheck(HBoundsCheck * instruction)5769 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5770   LocationSummary* locations = instruction->GetLocations();
5771   Location index_loc = locations->InAt(0);
5772   Location length_loc = locations->InAt(1);
5773   SlowPathCode* slow_path =
5774       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5775 
5776   if (length_loc.IsConstant()) {
5777     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5778     if (index_loc.IsConstant()) {
5779       // BCE will remove the bounds check if we are guarenteed to pass.
5780       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5781       if (index < 0 || index >= length) {
5782         codegen_->AddSlowPath(slow_path);
5783         __ jmp(slow_path->GetEntryLabel());
5784       } else {
5785         // Some optimization after BCE may have generated this, and we should not
5786         // generate a bounds check if it is a valid range.
5787       }
5788       return;
5789     }
5790 
5791     // We have to reverse the jump condition because the length is the constant.
5792     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5793     __ cmpl(index_reg, Immediate(length));
5794     codegen_->AddSlowPath(slow_path);
5795     __ j(kAboveEqual, slow_path->GetEntryLabel());
5796   } else {
5797     HInstruction* array_length = instruction->InputAt(1);
5798     if (array_length->IsEmittedAtUseSite()) {
5799       // Address the length field in the array.
5800       DCHECK(array_length->IsArrayLength());
5801       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5802       Location array_loc = array_length->GetLocations()->InAt(0);
5803       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5804       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5805         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5806         // the string compression flag) with the in-memory length and avoid the temporary.
5807         CpuRegister length_reg = CpuRegister(TMP);
5808         __ movl(length_reg, array_len);
5809         codegen_->MaybeRecordImplicitNullCheck(array_length);
5810         __ shrl(length_reg, Immediate(1));
5811         codegen_->GenerateIntCompare(length_reg, index_loc);
5812       } else {
5813         // Checking the bound for general case:
5814         // Array of char or String's array when the compression feature off.
5815         if (index_loc.IsConstant()) {
5816           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5817           __ cmpl(array_len, Immediate(value));
5818         } else {
5819           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5820         }
5821         codegen_->MaybeRecordImplicitNullCheck(array_length);
5822       }
5823     } else {
5824       codegen_->GenerateIntCompare(length_loc, index_loc);
5825     }
5826     codegen_->AddSlowPath(slow_path);
5827     __ j(kBelowEqual, slow_path->GetEntryLabel());
5828   }
5829 }
5830 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5831 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5832                                      CpuRegister card,
5833                                      CpuRegister object,
5834                                      CpuRegister value,
5835                                      bool value_can_be_null) {
5836   NearLabel is_null;
5837   if (value_can_be_null) {
5838     __ testl(value, value);
5839     __ j(kEqual, &is_null);
5840   }
5841   // Load the address of the card table into `card`.
5842   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5843                                         /* no_rip= */ true));
5844   // Calculate the offset (in the card table) of the card corresponding to
5845   // `object`.
5846   __ movq(temp, object);
5847   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5848   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5849   // `object`'s card.
5850   //
5851   // Register `card` contains the address of the card table. Note that the card
5852   // table's base is biased during its creation so that it always starts at an
5853   // address whose least-significant byte is equal to `kCardDirty` (see
5854   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5855   // below writes the `kCardDirty` (byte) value into the `object`'s card
5856   // (located at `card + object >> kCardShift`).
5857   //
5858   // This dual use of the value in register `card` (1. to calculate the location
5859   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5860   // (no need to explicitly load `kCardDirty` as an immediate value).
5861   __ movb(Address(temp, card, TIMES_1, 0), card);
5862   if (value_can_be_null) {
5863     __ Bind(&is_null);
5864   }
5865 }
5866 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5867 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5868   LOG(FATAL) << "Unimplemented";
5869 }
5870 
VisitParallelMove(HParallelMove * instruction)5871 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5872   if (instruction->GetNext()->IsSuspendCheck() &&
5873       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5874     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5875     // The back edge will generate the suspend check.
5876     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5877   }
5878 
5879   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5880 }
5881 
VisitSuspendCheck(HSuspendCheck * instruction)5882 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5883   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5884       instruction, LocationSummary::kCallOnSlowPath);
5885   // In suspend check slow path, usually there are no caller-save registers at all.
5886   // If SIMD instructions are present, however, we force spilling all live SIMD
5887   // registers in full width (since the runtime only saves/restores lower part).
5888   locations->SetCustomSlowPathCallerSaves(
5889       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5890 }
5891 
VisitSuspendCheck(HSuspendCheck * instruction)5892 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5893   HBasicBlock* block = instruction->GetBlock();
5894   if (block->GetLoopInformation() != nullptr) {
5895     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5896     // The back edge will generate the suspend check.
5897     return;
5898   }
5899   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5900     // The goto will generate the suspend check.
5901     return;
5902   }
5903   GenerateSuspendCheck(instruction, nullptr);
5904 }
5905 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5906 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5907                                                           HBasicBlock* successor) {
5908   SuspendCheckSlowPathX86_64* slow_path =
5909       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5910   if (slow_path == nullptr) {
5911     slow_path =
5912         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5913     instruction->SetSlowPath(slow_path);
5914     codegen_->AddSlowPath(slow_path);
5915     if (successor != nullptr) {
5916       DCHECK(successor->IsLoopHeader());
5917     }
5918   } else {
5919     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5920   }
5921 
5922   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5923                                   /* no_rip= */ true),
5924                 Immediate(0));
5925   if (successor == nullptr) {
5926     __ j(kNotEqual, slow_path->GetEntryLabel());
5927     __ Bind(slow_path->GetReturnLabel());
5928   } else {
5929     __ j(kEqual, codegen_->GetLabelOf(successor));
5930     __ jmp(slow_path->GetEntryLabel());
5931   }
5932 }
5933 
GetAssembler() const5934 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5935   return codegen_->GetAssembler();
5936 }
5937 
EmitMove(size_t index)5938 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5939   MoveOperands* move = moves_[index];
5940   Location source = move->GetSource();
5941   Location destination = move->GetDestination();
5942 
5943   if (source.IsRegister()) {
5944     if (destination.IsRegister()) {
5945       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5946     } else if (destination.IsStackSlot()) {
5947       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5948               source.AsRegister<CpuRegister>());
5949     } else {
5950       DCHECK(destination.IsDoubleStackSlot());
5951       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5952               source.AsRegister<CpuRegister>());
5953     }
5954   } else if (source.IsStackSlot()) {
5955     if (destination.IsRegister()) {
5956       __ movl(destination.AsRegister<CpuRegister>(),
5957               Address(CpuRegister(RSP), source.GetStackIndex()));
5958     } else if (destination.IsFpuRegister()) {
5959       __ movss(destination.AsFpuRegister<XmmRegister>(),
5960               Address(CpuRegister(RSP), source.GetStackIndex()));
5961     } else {
5962       DCHECK(destination.IsStackSlot());
5963       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5964       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5965     }
5966   } else if (source.IsDoubleStackSlot()) {
5967     if (destination.IsRegister()) {
5968       __ movq(destination.AsRegister<CpuRegister>(),
5969               Address(CpuRegister(RSP), source.GetStackIndex()));
5970     } else if (destination.IsFpuRegister()) {
5971       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5972                Address(CpuRegister(RSP), source.GetStackIndex()));
5973     } else {
5974       DCHECK(destination.IsDoubleStackSlot()) << destination;
5975       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5976       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5977     }
5978   } else if (source.IsSIMDStackSlot()) {
5979     if (destination.IsFpuRegister()) {
5980       __ movups(destination.AsFpuRegister<XmmRegister>(),
5981                 Address(CpuRegister(RSP), source.GetStackIndex()));
5982     } else {
5983       DCHECK(destination.IsSIMDStackSlot());
5984       size_t high = kX86_64WordSize;
5985       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5986       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5987       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5988       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5989     }
5990   } else if (source.IsConstant()) {
5991     HConstant* constant = source.GetConstant();
5992     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5993       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5994       if (destination.IsRegister()) {
5995         if (value == 0) {
5996           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5997         } else {
5998           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5999         }
6000       } else {
6001         DCHECK(destination.IsStackSlot()) << destination;
6002         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6003       }
6004     } else if (constant->IsLongConstant()) {
6005       int64_t value = constant->AsLongConstant()->GetValue();
6006       if (destination.IsRegister()) {
6007         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6008       } else {
6009         DCHECK(destination.IsDoubleStackSlot()) << destination;
6010         codegen_->Store64BitValueToStack(destination, value);
6011       }
6012     } else if (constant->IsFloatConstant()) {
6013       float fp_value = constant->AsFloatConstant()->GetValue();
6014       if (destination.IsFpuRegister()) {
6015         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6016         codegen_->Load32BitValue(dest, fp_value);
6017       } else {
6018         DCHECK(destination.IsStackSlot()) << destination;
6019         Immediate imm(bit_cast<int32_t, float>(fp_value));
6020         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6021       }
6022     } else {
6023       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6024       double fp_value =  constant->AsDoubleConstant()->GetValue();
6025       int64_t value = bit_cast<int64_t, double>(fp_value);
6026       if (destination.IsFpuRegister()) {
6027         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6028         codegen_->Load64BitValue(dest, fp_value);
6029       } else {
6030         DCHECK(destination.IsDoubleStackSlot()) << destination;
6031         codegen_->Store64BitValueToStack(destination, value);
6032       }
6033     }
6034   } else if (source.IsFpuRegister()) {
6035     if (destination.IsFpuRegister()) {
6036       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6037     } else if (destination.IsStackSlot()) {
6038       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6039                source.AsFpuRegister<XmmRegister>());
6040     } else if (destination.IsDoubleStackSlot()) {
6041       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6042                source.AsFpuRegister<XmmRegister>());
6043     } else {
6044        DCHECK(destination.IsSIMDStackSlot());
6045       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6046                 source.AsFpuRegister<XmmRegister>());
6047     }
6048   }
6049 }
6050 
Exchange32(CpuRegister reg,int mem)6051 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6052   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6053   __ movl(Address(CpuRegister(RSP), mem), reg);
6054   __ movl(reg, CpuRegister(TMP));
6055 }
6056 
Exchange64(CpuRegister reg1,CpuRegister reg2)6057 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6058   __ movq(CpuRegister(TMP), reg1);
6059   __ movq(reg1, reg2);
6060   __ movq(reg2, CpuRegister(TMP));
6061 }
6062 
Exchange64(CpuRegister reg,int mem)6063 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6064   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6065   __ movq(Address(CpuRegister(RSP), mem), reg);
6066   __ movq(reg, CpuRegister(TMP));
6067 }
6068 
Exchange32(XmmRegister reg,int mem)6069 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6070   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6071   __ movss(Address(CpuRegister(RSP), mem), reg);
6072   __ movd(reg, CpuRegister(TMP));
6073 }
6074 
Exchange64(XmmRegister reg,int mem)6075 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6076   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6077   __ movsd(Address(CpuRegister(RSP), mem), reg);
6078   __ movd(reg, CpuRegister(TMP));
6079 }
6080 
Exchange128(XmmRegister reg,int mem)6081 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6082   size_t extra_slot = 2 * kX86_64WordSize;
6083   __ subq(CpuRegister(RSP), Immediate(extra_slot));
6084   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6085   ExchangeMemory64(0, mem + extra_slot, 2);
6086   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6087   __ addq(CpuRegister(RSP), Immediate(extra_slot));
6088 }
6089 
ExchangeMemory32(int mem1,int mem2)6090 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6091   ScratchRegisterScope ensure_scratch(
6092       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6093 
6094   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6095   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6096   __ movl(CpuRegister(ensure_scratch.GetRegister()),
6097           Address(CpuRegister(RSP), mem2 + stack_offset));
6098   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6099   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6100           CpuRegister(ensure_scratch.GetRegister()));
6101 }
6102 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6103 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6104   ScratchRegisterScope ensure_scratch(
6105       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6106 
6107   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6108 
6109   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6110   for (int i = 0; i < num_of_qwords; i++) {
6111     __ movq(CpuRegister(TMP),
6112             Address(CpuRegister(RSP), mem1 + stack_offset));
6113     __ movq(CpuRegister(ensure_scratch.GetRegister()),
6114             Address(CpuRegister(RSP), mem2 + stack_offset));
6115     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6116             CpuRegister(TMP));
6117     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6118             CpuRegister(ensure_scratch.GetRegister()));
6119     stack_offset += kX86_64WordSize;
6120   }
6121 }
6122 
EmitSwap(size_t index)6123 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6124   MoveOperands* move = moves_[index];
6125   Location source = move->GetSource();
6126   Location destination = move->GetDestination();
6127 
6128   if (source.IsRegister() && destination.IsRegister()) {
6129     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6130   } else if (source.IsRegister() && destination.IsStackSlot()) {
6131     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6132   } else if (source.IsStackSlot() && destination.IsRegister()) {
6133     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6134   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6135     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6136   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6137     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6138   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6139     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6140   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6141     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6142   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6143     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6144     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6145     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6146   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6147     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6148   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6149     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6150   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6151     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6152   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6153     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6154   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6155     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6156   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6157     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6158   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6159     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6160   } else {
6161     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6162   }
6163 }
6164 
6165 
SpillScratch(int reg)6166 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6167   __ pushq(CpuRegister(reg));
6168 }
6169 
6170 
RestoreScratch(int reg)6171 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6172   __ popq(CpuRegister(reg));
6173 }
6174 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6175 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6176     SlowPathCode* slow_path, CpuRegister class_reg) {
6177   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6178   const size_t status_byte_offset =
6179       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6180   constexpr uint32_t shifted_visibly_initialized_value =
6181       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6182 
6183   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
6184   __ j(kBelow, slow_path->GetEntryLabel());
6185   __ Bind(slow_path->GetExitLabel());
6186 }
6187 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6188 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6189                                                                        CpuRegister temp) {
6190   uint32_t path_to_root = check->GetBitstringPathToRoot();
6191   uint32_t mask = check->GetBitstringMask();
6192   DCHECK(IsPowerOfTwo(mask + 1));
6193   size_t mask_bits = WhichPowerOf2(mask + 1);
6194 
6195   if (mask_bits == 16u) {
6196     // Compare the bitstring in memory.
6197     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6198   } else {
6199     // /* uint32_t */ temp = temp->status_
6200     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6201     // Compare the bitstring bits using SUB.
6202     __ subl(temp, Immediate(path_to_root));
6203     // Shift out bits that do not contribute to the comparison.
6204     __ shll(temp, Immediate(32u - mask_bits));
6205   }
6206 }
6207 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6208 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6209     HLoadClass::LoadKind desired_class_load_kind) {
6210   switch (desired_class_load_kind) {
6211     case HLoadClass::LoadKind::kInvalid:
6212       LOG(FATAL) << "UNREACHABLE";
6213       UNREACHABLE();
6214     case HLoadClass::LoadKind::kReferrersClass:
6215       break;
6216     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6217     case HLoadClass::LoadKind::kBootImageRelRo:
6218     case HLoadClass::LoadKind::kBssEntry:
6219     case HLoadClass::LoadKind::kBssEntryPublic:
6220     case HLoadClass::LoadKind::kBssEntryPackage:
6221       DCHECK(!GetCompilerOptions().IsJitCompiler());
6222       break;
6223     case HLoadClass::LoadKind::kJitBootImageAddress:
6224     case HLoadClass::LoadKind::kJitTableAddress:
6225       DCHECK(GetCompilerOptions().IsJitCompiler());
6226       break;
6227     case HLoadClass::LoadKind::kRuntimeCall:
6228       break;
6229   }
6230   return desired_class_load_kind;
6231 }
6232 
VisitLoadClass(HLoadClass * cls)6233 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6234   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6235   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6236     // Custom calling convention: RAX serves as both input and output.
6237     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6238         cls,
6239         Location::RegisterLocation(RAX),
6240         Location::RegisterLocation(RAX));
6241     return;
6242   }
6243   DCHECK_EQ(cls->NeedsAccessCheck(),
6244             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6245                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6246 
6247   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6248   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6249       ? LocationSummary::kCallOnSlowPath
6250       : LocationSummary::kNoCall;
6251   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6252   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6253     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6254   }
6255 
6256   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6257     locations->SetInAt(0, Location::RequiresRegister());
6258   }
6259   locations->SetOut(Location::RequiresRegister());
6260   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6261     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6262       // Rely on the type resolution and/or initialization to save everything.
6263       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6264     } else {
6265       // For non-Baker read barrier we have a temp-clobbering call.
6266     }
6267   }
6268 }
6269 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6270 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6271                                                  dex::TypeIndex type_index,
6272                                                  Handle<mirror::Class> handle) {
6273   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6274   // Add a patch entry and return the label.
6275   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6276   PatchInfo<Label>* info = &jit_class_patches_.back();
6277   return &info->label;
6278 }
6279 
6280 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6281 // move.
VisitLoadClass(HLoadClass * cls)6282 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6283   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6284   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6285     codegen_->GenerateLoadClassRuntimeCall(cls);
6286     return;
6287   }
6288   DCHECK_EQ(cls->NeedsAccessCheck(),
6289             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6290                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6291 
6292   LocationSummary* locations = cls->GetLocations();
6293   Location out_loc = locations->Out();
6294   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6295 
6296   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6297       ? kWithoutReadBarrier
6298       : kCompilerReadBarrierOption;
6299   bool generate_null_check = false;
6300   switch (load_kind) {
6301     case HLoadClass::LoadKind::kReferrersClass: {
6302       DCHECK(!cls->CanCallRuntime());
6303       DCHECK(!cls->MustGenerateClinitCheck());
6304       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6305       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6306       GenerateGcRootFieldLoad(
6307           cls,
6308           out_loc,
6309           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6310           /* fixup_label= */ nullptr,
6311           read_barrier_option);
6312       break;
6313     }
6314     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6315       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6316              codegen_->GetCompilerOptions().IsBootImageExtension());
6317       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6318       __ leal(out,
6319               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6320       codegen_->RecordBootImageTypePatch(cls);
6321       break;
6322     case HLoadClass::LoadKind::kBootImageRelRo: {
6323       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6324       __ movl(out,
6325               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6326       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6327       break;
6328     }
6329     case HLoadClass::LoadKind::kBssEntry:
6330     case HLoadClass::LoadKind::kBssEntryPublic:
6331     case HLoadClass::LoadKind::kBssEntryPackage: {
6332       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6333                                           /* no_rip= */ false);
6334       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6335       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6336       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6337       // No need for memory fence, thanks to the x86-64 memory model.
6338       generate_null_check = true;
6339       break;
6340     }
6341     case HLoadClass::LoadKind::kJitBootImageAddress: {
6342       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6343       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6344       DCHECK_NE(address, 0u);
6345       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6346       break;
6347     }
6348     case HLoadClass::LoadKind::kJitTableAddress: {
6349       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6350                                           /* no_rip= */ true);
6351       Label* fixup_label =
6352           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6353       // /* GcRoot<mirror::Class> */ out = *address
6354       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6355       break;
6356     }
6357     default:
6358       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6359       UNREACHABLE();
6360   }
6361 
6362   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6363     DCHECK(cls->CanCallRuntime());
6364     SlowPathCode* slow_path =
6365         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6366     codegen_->AddSlowPath(slow_path);
6367     if (generate_null_check) {
6368       __ testl(out, out);
6369       __ j(kEqual, slow_path->GetEntryLabel());
6370     }
6371     if (cls->MustGenerateClinitCheck()) {
6372       GenerateClassInitializationCheck(slow_path, out);
6373     } else {
6374       __ Bind(slow_path->GetExitLabel());
6375     }
6376   }
6377 }
6378 
VisitClinitCheck(HClinitCheck * check)6379 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6380   LocationSummary* locations =
6381       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6382   locations->SetInAt(0, Location::RequiresRegister());
6383   if (check->HasUses()) {
6384     locations->SetOut(Location::SameAsFirstInput());
6385   }
6386   // Rely on the type initialization to save everything we need.
6387   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6388 }
6389 
VisitLoadMethodHandle(HLoadMethodHandle * load)6390 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6391   // Custom calling convention: RAX serves as both input and output.
6392   Location location = Location::RegisterLocation(RAX);
6393   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6394 }
6395 
VisitLoadMethodHandle(HLoadMethodHandle * load)6396 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6397   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6398 }
6399 
VisitLoadMethodType(HLoadMethodType * load)6400 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6401   // Custom calling convention: RAX serves as both input and output.
6402   Location location = Location::RegisterLocation(RAX);
6403   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6404 }
6405 
VisitLoadMethodType(HLoadMethodType * load)6406 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6407   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6408 }
6409 
VisitClinitCheck(HClinitCheck * check)6410 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6411   // We assume the class to not be null.
6412   SlowPathCode* slow_path =
6413       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6414   codegen_->AddSlowPath(slow_path);
6415   GenerateClassInitializationCheck(slow_path,
6416                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6417 }
6418 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6419 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6420     HLoadString::LoadKind desired_string_load_kind) {
6421   switch (desired_string_load_kind) {
6422     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6423     case HLoadString::LoadKind::kBootImageRelRo:
6424     case HLoadString::LoadKind::kBssEntry:
6425       DCHECK(!GetCompilerOptions().IsJitCompiler());
6426       break;
6427     case HLoadString::LoadKind::kJitBootImageAddress:
6428     case HLoadString::LoadKind::kJitTableAddress:
6429       DCHECK(GetCompilerOptions().IsJitCompiler());
6430       break;
6431     case HLoadString::LoadKind::kRuntimeCall:
6432       break;
6433   }
6434   return desired_string_load_kind;
6435 }
6436 
VisitLoadString(HLoadString * load)6437 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6438   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6439   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6440   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6441     locations->SetOut(Location::RegisterLocation(RAX));
6442   } else {
6443     locations->SetOut(Location::RequiresRegister());
6444     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6445       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6446         // Rely on the pResolveString to save everything.
6447         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6448       } else {
6449         // For non-Baker read barrier we have a temp-clobbering call.
6450       }
6451     }
6452   }
6453 }
6454 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6455 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6456                                                   dex::StringIndex string_index,
6457                                                   Handle<mirror::String> handle) {
6458   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6459   // Add a patch entry and return the label.
6460   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6461   PatchInfo<Label>* info = &jit_string_patches_.back();
6462   return &info->label;
6463 }
6464 
6465 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6466 // move.
VisitLoadString(HLoadString * load)6467 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6468   LocationSummary* locations = load->GetLocations();
6469   Location out_loc = locations->Out();
6470   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6471 
6472   switch (load->GetLoadKind()) {
6473     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6474       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6475              codegen_->GetCompilerOptions().IsBootImageExtension());
6476       __ leal(out,
6477               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6478       codegen_->RecordBootImageStringPatch(load);
6479       return;
6480     }
6481     case HLoadString::LoadKind::kBootImageRelRo: {
6482       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6483       __ movl(out,
6484               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6485       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6486       return;
6487     }
6488     case HLoadString::LoadKind::kBssEntry: {
6489       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6490                                           /* no_rip= */ false);
6491       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6492       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6493       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6494       // No need for memory fence, thanks to the x86-64 memory model.
6495       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6496       codegen_->AddSlowPath(slow_path);
6497       __ testl(out, out);
6498       __ j(kEqual, slow_path->GetEntryLabel());
6499       __ Bind(slow_path->GetExitLabel());
6500       return;
6501     }
6502     case HLoadString::LoadKind::kJitBootImageAddress: {
6503       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6504       DCHECK_NE(address, 0u);
6505       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6506       return;
6507     }
6508     case HLoadString::LoadKind::kJitTableAddress: {
6509       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6510                                           /* no_rip= */ true);
6511       Label* fixup_label = codegen_->NewJitRootStringPatch(
6512           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6513       // /* GcRoot<mirror::String> */ out = *address
6514       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6515       return;
6516     }
6517     default:
6518       break;
6519   }
6520 
6521   // TODO: Re-add the compiler code to do string dex cache lookup again.
6522   // Custom calling convention: RAX serves as both input and output.
6523   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6524   codegen_->InvokeRuntime(kQuickResolveString,
6525                           load,
6526                           load->GetDexPc());
6527   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6528 }
6529 
GetExceptionTlsAddress()6530 static Address GetExceptionTlsAddress() {
6531   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6532                            /* no_rip= */ true);
6533 }
6534 
VisitLoadException(HLoadException * load)6535 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6536   LocationSummary* locations =
6537       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6538   locations->SetOut(Location::RequiresRegister());
6539 }
6540 
VisitLoadException(HLoadException * load)6541 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6542   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6543 }
6544 
VisitClearException(HClearException * clear)6545 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6546   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6547 }
6548 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6549 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6550   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6551 }
6552 
VisitThrow(HThrow * instruction)6553 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6554   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6555       instruction, LocationSummary::kCallOnMainOnly);
6556   InvokeRuntimeCallingConvention calling_convention;
6557   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6558 }
6559 
VisitThrow(HThrow * instruction)6560 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6561   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6562   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6563 }
6564 
6565 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6566 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6567   if (kEmitCompilerReadBarrier &&
6568       !kUseBakerReadBarrier &&
6569       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6570        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6571        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6572     return 1;
6573   }
6574   return 0;
6575 }
6576 
6577 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6578 // interface pointer, the current interface is compared in memory.
6579 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6580 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6581   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6582     return 2;
6583   }
6584   return 1 + NumberOfInstanceOfTemps(type_check_kind);
6585 }
6586 
VisitInstanceOf(HInstanceOf * instruction)6587 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6588   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6589   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6590   bool baker_read_barrier_slow_path = false;
6591   switch (type_check_kind) {
6592     case TypeCheckKind::kExactCheck:
6593     case TypeCheckKind::kAbstractClassCheck:
6594     case TypeCheckKind::kClassHierarchyCheck:
6595     case TypeCheckKind::kArrayObjectCheck: {
6596       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6597       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6598       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6599       break;
6600     }
6601     case TypeCheckKind::kArrayCheck:
6602     case TypeCheckKind::kUnresolvedCheck:
6603     case TypeCheckKind::kInterfaceCheck:
6604       call_kind = LocationSummary::kCallOnSlowPath;
6605       break;
6606     case TypeCheckKind::kBitstringCheck:
6607       break;
6608   }
6609 
6610   LocationSummary* locations =
6611       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6612   if (baker_read_barrier_slow_path) {
6613     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6614   }
6615   locations->SetInAt(0, Location::RequiresRegister());
6616   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6617     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6618     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6619     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6620   } else {
6621     locations->SetInAt(1, Location::Any());
6622   }
6623   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6624   locations->SetOut(Location::RequiresRegister());
6625   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6626 }
6627 
VisitInstanceOf(HInstanceOf * instruction)6628 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6629   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6630   LocationSummary* locations = instruction->GetLocations();
6631   Location obj_loc = locations->InAt(0);
6632   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6633   Location cls = locations->InAt(1);
6634   Location out_loc =  locations->Out();
6635   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6636   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6637   DCHECK_LE(num_temps, 1u);
6638   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6639   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6640   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6641   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6642   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6643   SlowPathCode* slow_path = nullptr;
6644   NearLabel done, zero;
6645 
6646   // Return 0 if `obj` is null.
6647   // Avoid null check if we know obj is not null.
6648   if (instruction->MustDoNullCheck()) {
6649     __ testl(obj, obj);
6650     __ j(kEqual, &zero);
6651   }
6652 
6653   switch (type_check_kind) {
6654     case TypeCheckKind::kExactCheck: {
6655       ReadBarrierOption read_barrier_option =
6656           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6657       // /* HeapReference<Class> */ out = obj->klass_
6658       GenerateReferenceLoadTwoRegisters(instruction,
6659                                         out_loc,
6660                                         obj_loc,
6661                                         class_offset,
6662                                         read_barrier_option);
6663       if (cls.IsRegister()) {
6664         __ cmpl(out, cls.AsRegister<CpuRegister>());
6665       } else {
6666         DCHECK(cls.IsStackSlot()) << cls;
6667         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6668       }
6669       if (zero.IsLinked()) {
6670         // Classes must be equal for the instanceof to succeed.
6671         __ j(kNotEqual, &zero);
6672         __ movl(out, Immediate(1));
6673         __ jmp(&done);
6674       } else {
6675         __ setcc(kEqual, out);
6676         // setcc only sets the low byte.
6677         __ andl(out, Immediate(1));
6678       }
6679       break;
6680     }
6681 
6682     case TypeCheckKind::kAbstractClassCheck: {
6683       ReadBarrierOption read_barrier_option =
6684           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6685       // /* HeapReference<Class> */ out = obj->klass_
6686       GenerateReferenceLoadTwoRegisters(instruction,
6687                                         out_loc,
6688                                         obj_loc,
6689                                         class_offset,
6690                                         read_barrier_option);
6691       // If the class is abstract, we eagerly fetch the super class of the
6692       // object to avoid doing a comparison we know will fail.
6693       NearLabel loop, success;
6694       __ Bind(&loop);
6695       // /* HeapReference<Class> */ out = out->super_class_
6696       GenerateReferenceLoadOneRegister(instruction,
6697                                        out_loc,
6698                                        super_offset,
6699                                        maybe_temp_loc,
6700                                        read_barrier_option);
6701       __ testl(out, out);
6702       // If `out` is null, we use it for the result, and jump to `done`.
6703       __ j(kEqual, &done);
6704       if (cls.IsRegister()) {
6705         __ cmpl(out, cls.AsRegister<CpuRegister>());
6706       } else {
6707         DCHECK(cls.IsStackSlot()) << cls;
6708         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6709       }
6710       __ j(kNotEqual, &loop);
6711       __ movl(out, Immediate(1));
6712       if (zero.IsLinked()) {
6713         __ jmp(&done);
6714       }
6715       break;
6716     }
6717 
6718     case TypeCheckKind::kClassHierarchyCheck: {
6719       ReadBarrierOption read_barrier_option =
6720           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6721       // /* HeapReference<Class> */ out = obj->klass_
6722       GenerateReferenceLoadTwoRegisters(instruction,
6723                                         out_loc,
6724                                         obj_loc,
6725                                         class_offset,
6726                                         read_barrier_option);
6727       // Walk over the class hierarchy to find a match.
6728       NearLabel loop, success;
6729       __ Bind(&loop);
6730       if (cls.IsRegister()) {
6731         __ cmpl(out, cls.AsRegister<CpuRegister>());
6732       } else {
6733         DCHECK(cls.IsStackSlot()) << cls;
6734         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6735       }
6736       __ j(kEqual, &success);
6737       // /* HeapReference<Class> */ out = out->super_class_
6738       GenerateReferenceLoadOneRegister(instruction,
6739                                        out_loc,
6740                                        super_offset,
6741                                        maybe_temp_loc,
6742                                        read_barrier_option);
6743       __ testl(out, out);
6744       __ j(kNotEqual, &loop);
6745       // If `out` is null, we use it for the result, and jump to `done`.
6746       __ jmp(&done);
6747       __ Bind(&success);
6748       __ movl(out, Immediate(1));
6749       if (zero.IsLinked()) {
6750         __ jmp(&done);
6751       }
6752       break;
6753     }
6754 
6755     case TypeCheckKind::kArrayObjectCheck: {
6756       ReadBarrierOption read_barrier_option =
6757           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6758       // /* HeapReference<Class> */ out = obj->klass_
6759       GenerateReferenceLoadTwoRegisters(instruction,
6760                                         out_loc,
6761                                         obj_loc,
6762                                         class_offset,
6763                                         read_barrier_option);
6764       // Do an exact check.
6765       NearLabel exact_check;
6766       if (cls.IsRegister()) {
6767         __ cmpl(out, cls.AsRegister<CpuRegister>());
6768       } else {
6769         DCHECK(cls.IsStackSlot()) << cls;
6770         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6771       }
6772       __ j(kEqual, &exact_check);
6773       // Otherwise, we need to check that the object's class is a non-primitive array.
6774       // /* HeapReference<Class> */ out = out->component_type_
6775       GenerateReferenceLoadOneRegister(instruction,
6776                                        out_loc,
6777                                        component_offset,
6778                                        maybe_temp_loc,
6779                                        read_barrier_option);
6780       __ testl(out, out);
6781       // If `out` is null, we use it for the result, and jump to `done`.
6782       __ j(kEqual, &done);
6783       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6784       __ j(kNotEqual, &zero);
6785       __ Bind(&exact_check);
6786       __ movl(out, Immediate(1));
6787       __ jmp(&done);
6788       break;
6789     }
6790 
6791     case TypeCheckKind::kArrayCheck: {
6792       // No read barrier since the slow path will retry upon failure.
6793       // /* HeapReference<Class> */ out = obj->klass_
6794       GenerateReferenceLoadTwoRegisters(instruction,
6795                                         out_loc,
6796                                         obj_loc,
6797                                         class_offset,
6798                                         kWithoutReadBarrier);
6799       if (cls.IsRegister()) {
6800         __ cmpl(out, cls.AsRegister<CpuRegister>());
6801       } else {
6802         DCHECK(cls.IsStackSlot()) << cls;
6803         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6804       }
6805       DCHECK(locations->OnlyCallsOnSlowPath());
6806       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6807           instruction, /* is_fatal= */ false);
6808       codegen_->AddSlowPath(slow_path);
6809       __ j(kNotEqual, slow_path->GetEntryLabel());
6810       __ movl(out, Immediate(1));
6811       if (zero.IsLinked()) {
6812         __ jmp(&done);
6813       }
6814       break;
6815     }
6816 
6817     case TypeCheckKind::kUnresolvedCheck:
6818     case TypeCheckKind::kInterfaceCheck: {
6819       // Note that we indeed only call on slow path, but we always go
6820       // into the slow path for the unresolved and interface check
6821       // cases.
6822       //
6823       // We cannot directly call the InstanceofNonTrivial runtime
6824       // entry point without resorting to a type checking slow path
6825       // here (i.e. by calling InvokeRuntime directly), as it would
6826       // require to assign fixed registers for the inputs of this
6827       // HInstanceOf instruction (following the runtime calling
6828       // convention), which might be cluttered by the potential first
6829       // read barrier emission at the beginning of this method.
6830       //
6831       // TODO: Introduce a new runtime entry point taking the object
6832       // to test (instead of its class) as argument, and let it deal
6833       // with the read barrier issues. This will let us refactor this
6834       // case of the `switch` code as it was previously (with a direct
6835       // call to the runtime not using a type checking slow path).
6836       // This should also be beneficial for the other cases above.
6837       DCHECK(locations->OnlyCallsOnSlowPath());
6838       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6839           instruction, /* is_fatal= */ false);
6840       codegen_->AddSlowPath(slow_path);
6841       __ jmp(slow_path->GetEntryLabel());
6842       if (zero.IsLinked()) {
6843         __ jmp(&done);
6844       }
6845       break;
6846     }
6847 
6848     case TypeCheckKind::kBitstringCheck: {
6849       // /* HeapReference<Class> */ temp = obj->klass_
6850       GenerateReferenceLoadTwoRegisters(instruction,
6851                                         out_loc,
6852                                         obj_loc,
6853                                         class_offset,
6854                                         kWithoutReadBarrier);
6855 
6856       GenerateBitstringTypeCheckCompare(instruction, out);
6857       if (zero.IsLinked()) {
6858         __ j(kNotEqual, &zero);
6859         __ movl(out, Immediate(1));
6860         __ jmp(&done);
6861       } else {
6862         __ setcc(kEqual, out);
6863         // setcc only sets the low byte.
6864         __ andl(out, Immediate(1));
6865       }
6866       break;
6867     }
6868   }
6869 
6870   if (zero.IsLinked()) {
6871     __ Bind(&zero);
6872     __ xorl(out, out);
6873   }
6874 
6875   if (done.IsLinked()) {
6876     __ Bind(&done);
6877   }
6878 
6879   if (slow_path != nullptr) {
6880     __ Bind(slow_path->GetExitLabel());
6881   }
6882 }
6883 
VisitCheckCast(HCheckCast * instruction)6884 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6885   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6886   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6887   LocationSummary* locations =
6888       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6889   locations->SetInAt(0, Location::RequiresRegister());
6890   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6891     // Require a register for the interface check since there is a loop that compares the class to
6892     // a memory address.
6893     locations->SetInAt(1, Location::RequiresRegister());
6894   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6895     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6896     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6897     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6898   } else {
6899     locations->SetInAt(1, Location::Any());
6900   }
6901   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6902   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6903 }
6904 
VisitCheckCast(HCheckCast * instruction)6905 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6906   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6907   LocationSummary* locations = instruction->GetLocations();
6908   Location obj_loc = locations->InAt(0);
6909   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6910   Location cls = locations->InAt(1);
6911   Location temp_loc = locations->GetTemp(0);
6912   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6913   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6914   DCHECK_GE(num_temps, 1u);
6915   DCHECK_LE(num_temps, 2u);
6916   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6917   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6918   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6919   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6920   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6921   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6922   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6923   const uint32_t object_array_data_offset =
6924       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6925 
6926   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6927   SlowPathCode* type_check_slow_path =
6928       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6929           instruction, is_type_check_slow_path_fatal);
6930   codegen_->AddSlowPath(type_check_slow_path);
6931 
6932 
6933   NearLabel done;
6934   // Avoid null check if we know obj is not null.
6935   if (instruction->MustDoNullCheck()) {
6936     __ testl(obj, obj);
6937     __ j(kEqual, &done);
6938   }
6939 
6940   switch (type_check_kind) {
6941     case TypeCheckKind::kExactCheck:
6942     case TypeCheckKind::kArrayCheck: {
6943       // /* HeapReference<Class> */ temp = obj->klass_
6944       GenerateReferenceLoadTwoRegisters(instruction,
6945                                         temp_loc,
6946                                         obj_loc,
6947                                         class_offset,
6948                                         kWithoutReadBarrier);
6949       if (cls.IsRegister()) {
6950         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6951       } else {
6952         DCHECK(cls.IsStackSlot()) << cls;
6953         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6954       }
6955       // Jump to slow path for throwing the exception or doing a
6956       // more involved array check.
6957       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6958       break;
6959     }
6960 
6961     case TypeCheckKind::kAbstractClassCheck: {
6962       // /* HeapReference<Class> */ temp = obj->klass_
6963       GenerateReferenceLoadTwoRegisters(instruction,
6964                                         temp_loc,
6965                                         obj_loc,
6966                                         class_offset,
6967                                         kWithoutReadBarrier);
6968       // If the class is abstract, we eagerly fetch the super class of the
6969       // object to avoid doing a comparison we know will fail.
6970       NearLabel loop;
6971       __ Bind(&loop);
6972       // /* HeapReference<Class> */ temp = temp->super_class_
6973       GenerateReferenceLoadOneRegister(instruction,
6974                                        temp_loc,
6975                                        super_offset,
6976                                        maybe_temp2_loc,
6977                                        kWithoutReadBarrier);
6978 
6979       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6980       // exception.
6981       __ testl(temp, temp);
6982       // Otherwise, compare the classes.
6983       __ j(kZero, type_check_slow_path->GetEntryLabel());
6984       if (cls.IsRegister()) {
6985         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6986       } else {
6987         DCHECK(cls.IsStackSlot()) << cls;
6988         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6989       }
6990       __ j(kNotEqual, &loop);
6991       break;
6992     }
6993 
6994     case TypeCheckKind::kClassHierarchyCheck: {
6995       // /* HeapReference<Class> */ temp = obj->klass_
6996       GenerateReferenceLoadTwoRegisters(instruction,
6997                                         temp_loc,
6998                                         obj_loc,
6999                                         class_offset,
7000                                         kWithoutReadBarrier);
7001       // Walk over the class hierarchy to find a match.
7002       NearLabel loop;
7003       __ Bind(&loop);
7004       if (cls.IsRegister()) {
7005         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7006       } else {
7007         DCHECK(cls.IsStackSlot()) << cls;
7008         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7009       }
7010       __ j(kEqual, &done);
7011 
7012       // /* HeapReference<Class> */ temp = temp->super_class_
7013       GenerateReferenceLoadOneRegister(instruction,
7014                                        temp_loc,
7015                                        super_offset,
7016                                        maybe_temp2_loc,
7017                                        kWithoutReadBarrier);
7018 
7019       // If the class reference currently in `temp` is not null, jump
7020       // back at the beginning of the loop.
7021       __ testl(temp, temp);
7022       __ j(kNotZero, &loop);
7023       // Otherwise, jump to the slow path to throw the exception.
7024       __ jmp(type_check_slow_path->GetEntryLabel());
7025       break;
7026     }
7027 
7028     case TypeCheckKind::kArrayObjectCheck: {
7029       // /* HeapReference<Class> */ temp = obj->klass_
7030       GenerateReferenceLoadTwoRegisters(instruction,
7031                                         temp_loc,
7032                                         obj_loc,
7033                                         class_offset,
7034                                         kWithoutReadBarrier);
7035       // Do an exact check.
7036       NearLabel check_non_primitive_component_type;
7037       if (cls.IsRegister()) {
7038         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7039       } else {
7040         DCHECK(cls.IsStackSlot()) << cls;
7041         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7042       }
7043       __ j(kEqual, &done);
7044 
7045       // Otherwise, we need to check that the object's class is a non-primitive array.
7046       // /* HeapReference<Class> */ temp = temp->component_type_
7047       GenerateReferenceLoadOneRegister(instruction,
7048                                        temp_loc,
7049                                        component_offset,
7050                                        maybe_temp2_loc,
7051                                        kWithoutReadBarrier);
7052 
7053       // If the component type is not null (i.e. the object is indeed
7054       // an array), jump to label `check_non_primitive_component_type`
7055       // to further check that this component type is not a primitive
7056       // type.
7057       __ testl(temp, temp);
7058       // Otherwise, jump to the slow path to throw the exception.
7059       __ j(kZero, type_check_slow_path->GetEntryLabel());
7060       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7061       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7062       break;
7063     }
7064 
7065     case TypeCheckKind::kUnresolvedCheck: {
7066       // We always go into the type check slow path for the unresolved case.
7067       //
7068       // We cannot directly call the CheckCast runtime entry point
7069       // without resorting to a type checking slow path here (i.e. by
7070       // calling InvokeRuntime directly), as it would require to
7071       // assign fixed registers for the inputs of this HInstanceOf
7072       // instruction (following the runtime calling convention), which
7073       // might be cluttered by the potential first read barrier
7074       // emission at the beginning of this method.
7075       __ jmp(type_check_slow_path->GetEntryLabel());
7076       break;
7077     }
7078 
7079     case TypeCheckKind::kInterfaceCheck: {
7080       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7081       // We can not get false positives by doing this.
7082       // /* HeapReference<Class> */ temp = obj->klass_
7083       GenerateReferenceLoadTwoRegisters(instruction,
7084                                         temp_loc,
7085                                         obj_loc,
7086                                         class_offset,
7087                                         kWithoutReadBarrier);
7088 
7089       // /* HeapReference<Class> */ temp = temp->iftable_
7090       GenerateReferenceLoadTwoRegisters(instruction,
7091                                         temp_loc,
7092                                         temp_loc,
7093                                         iftable_offset,
7094                                         kWithoutReadBarrier);
7095       // Iftable is never null.
7096       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7097       // Maybe poison the `cls` for direct comparison with memory.
7098       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7099       // Loop through the iftable and check if any class matches.
7100       NearLabel start_loop;
7101       __ Bind(&start_loop);
7102       // Need to subtract first to handle the empty array case.
7103       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7104       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7105       // Go to next interface if the classes do not match.
7106       __ cmpl(cls.AsRegister<CpuRegister>(),
7107               CodeGeneratorX86_64::ArrayAddress(temp,
7108                                                 maybe_temp2_loc,
7109                                                 TIMES_4,
7110                                                 object_array_data_offset));
7111       __ j(kNotEqual, &start_loop);  // Return if same class.
7112       // If `cls` was poisoned above, unpoison it.
7113       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7114       break;
7115     }
7116 
7117     case TypeCheckKind::kBitstringCheck: {
7118       // /* HeapReference<Class> */ temp = obj->klass_
7119       GenerateReferenceLoadTwoRegisters(instruction,
7120                                         temp_loc,
7121                                         obj_loc,
7122                                         class_offset,
7123                                         kWithoutReadBarrier);
7124 
7125       GenerateBitstringTypeCheckCompare(instruction, temp);
7126       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7127       break;
7128     }
7129   }
7130 
7131   if (done.IsLinked()) {
7132     __ Bind(&done);
7133   }
7134 
7135   __ Bind(type_check_slow_path->GetExitLabel());
7136 }
7137 
VisitMonitorOperation(HMonitorOperation * instruction)7138 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7139   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7140       instruction, LocationSummary::kCallOnMainOnly);
7141   InvokeRuntimeCallingConvention calling_convention;
7142   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7143 }
7144 
VisitMonitorOperation(HMonitorOperation * instruction)7145 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7146   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7147                           instruction,
7148                           instruction->GetDexPc());
7149   if (instruction->IsEnter()) {
7150     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7151   } else {
7152     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7153   }
7154 }
7155 
VisitX86AndNot(HX86AndNot * instruction)7156 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7157   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7158   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7159   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7160   locations->SetInAt(0, Location::RequiresRegister());
7161   // There is no immediate variant of negated bitwise and in X86.
7162   locations->SetInAt(1, Location::RequiresRegister());
7163   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7164 }
7165 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7166 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7167   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7168   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7169   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7170   locations->SetInAt(0, Location::RequiresRegister());
7171   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7172 }
7173 
VisitX86AndNot(HX86AndNot * instruction)7174 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7175   LocationSummary* locations = instruction->GetLocations();
7176   Location first = locations->InAt(0);
7177   Location second = locations->InAt(1);
7178   Location dest = locations->Out();
7179   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7180 }
7181 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7182 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7183   LocationSummary* locations = instruction->GetLocations();
7184   Location src = locations->InAt(0);
7185   Location dest = locations->Out();
7186   switch (instruction->GetOpKind()) {
7187     case HInstruction::kAnd:
7188       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7189       break;
7190     case HInstruction::kXor:
7191       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7192       break;
7193     default:
7194       LOG(FATAL) << "Unreachable";
7195   }
7196 }
7197 
VisitAnd(HAnd * instruction)7198 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7199 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7200 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7201 
HandleBitwiseOperation(HBinaryOperation * instruction)7202 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7203   LocationSummary* locations =
7204       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7205   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7206          || instruction->GetResultType() == DataType::Type::kInt64);
7207   locations->SetInAt(0, Location::RequiresRegister());
7208   locations->SetInAt(1, Location::Any());
7209   locations->SetOut(Location::SameAsFirstInput());
7210 }
7211 
VisitAnd(HAnd * instruction)7212 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7213   HandleBitwiseOperation(instruction);
7214 }
7215 
VisitOr(HOr * instruction)7216 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7217   HandleBitwiseOperation(instruction);
7218 }
7219 
VisitXor(HXor * instruction)7220 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7221   HandleBitwiseOperation(instruction);
7222 }
7223 
HandleBitwiseOperation(HBinaryOperation * instruction)7224 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7225   LocationSummary* locations = instruction->GetLocations();
7226   Location first = locations->InAt(0);
7227   Location second = locations->InAt(1);
7228   DCHECK(first.Equals(locations->Out()));
7229 
7230   if (instruction->GetResultType() == DataType::Type::kInt32) {
7231     if (second.IsRegister()) {
7232       if (instruction->IsAnd()) {
7233         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7234       } else if (instruction->IsOr()) {
7235         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7236       } else {
7237         DCHECK(instruction->IsXor());
7238         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7239       }
7240     } else if (second.IsConstant()) {
7241       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7242       if (instruction->IsAnd()) {
7243         __ andl(first.AsRegister<CpuRegister>(), imm);
7244       } else if (instruction->IsOr()) {
7245         __ orl(first.AsRegister<CpuRegister>(), imm);
7246       } else {
7247         DCHECK(instruction->IsXor());
7248         __ xorl(first.AsRegister<CpuRegister>(), imm);
7249       }
7250     } else {
7251       Address address(CpuRegister(RSP), second.GetStackIndex());
7252       if (instruction->IsAnd()) {
7253         __ andl(first.AsRegister<CpuRegister>(), address);
7254       } else if (instruction->IsOr()) {
7255         __ orl(first.AsRegister<CpuRegister>(), address);
7256       } else {
7257         DCHECK(instruction->IsXor());
7258         __ xorl(first.AsRegister<CpuRegister>(), address);
7259       }
7260     }
7261   } else {
7262     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7263     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7264     bool second_is_constant = false;
7265     int64_t value = 0;
7266     if (second.IsConstant()) {
7267       second_is_constant = true;
7268       value = second.GetConstant()->AsLongConstant()->GetValue();
7269     }
7270     bool is_int32_value = IsInt<32>(value);
7271 
7272     if (instruction->IsAnd()) {
7273       if (second_is_constant) {
7274         if (is_int32_value) {
7275           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7276         } else {
7277           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7278         }
7279       } else if (second.IsDoubleStackSlot()) {
7280         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7281       } else {
7282         __ andq(first_reg, second.AsRegister<CpuRegister>());
7283       }
7284     } else if (instruction->IsOr()) {
7285       if (second_is_constant) {
7286         if (is_int32_value) {
7287           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7288         } else {
7289           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7290         }
7291       } else if (second.IsDoubleStackSlot()) {
7292         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7293       } else {
7294         __ orq(first_reg, second.AsRegister<CpuRegister>());
7295       }
7296     } else {
7297       DCHECK(instruction->IsXor());
7298       if (second_is_constant) {
7299         if (is_int32_value) {
7300           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7301         } else {
7302           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7303         }
7304       } else if (second.IsDoubleStackSlot()) {
7305         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7306       } else {
7307         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7308       }
7309     }
7310   }
7311 }
7312 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7313 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7314     HInstruction* instruction,
7315     Location out,
7316     uint32_t offset,
7317     Location maybe_temp,
7318     ReadBarrierOption read_barrier_option) {
7319   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7320   if (read_barrier_option == kWithReadBarrier) {
7321     CHECK(kEmitCompilerReadBarrier);
7322     if (kUseBakerReadBarrier) {
7323       // Load with fast path based Baker's read barrier.
7324       // /* HeapReference<Object> */ out = *(out + offset)
7325       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7326           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7327     } else {
7328       // Load with slow path based read barrier.
7329       // Save the value of `out` into `maybe_temp` before overwriting it
7330       // in the following move operation, as we will need it for the
7331       // read barrier below.
7332       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7333       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7334       // /* HeapReference<Object> */ out = *(out + offset)
7335       __ movl(out_reg, Address(out_reg, offset));
7336       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7337     }
7338   } else {
7339     // Plain load with no read barrier.
7340     // /* HeapReference<Object> */ out = *(out + offset)
7341     __ movl(out_reg, Address(out_reg, offset));
7342     __ MaybeUnpoisonHeapReference(out_reg);
7343   }
7344 }
7345 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7346 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7347     HInstruction* instruction,
7348     Location out,
7349     Location obj,
7350     uint32_t offset,
7351     ReadBarrierOption read_barrier_option) {
7352   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7353   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7354   if (read_barrier_option == kWithReadBarrier) {
7355     CHECK(kEmitCompilerReadBarrier);
7356     if (kUseBakerReadBarrier) {
7357       // Load with fast path based Baker's read barrier.
7358       // /* HeapReference<Object> */ out = *(obj + offset)
7359       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7360           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7361     } else {
7362       // Load with slow path based read barrier.
7363       // /* HeapReference<Object> */ out = *(obj + offset)
7364       __ movl(out_reg, Address(obj_reg, offset));
7365       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7366     }
7367   } else {
7368     // Plain load with no read barrier.
7369     // /* HeapReference<Object> */ out = *(obj + offset)
7370     __ movl(out_reg, Address(obj_reg, offset));
7371     __ MaybeUnpoisonHeapReference(out_reg);
7372   }
7373 }
7374 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7375 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7376     HInstruction* instruction,
7377     Location root,
7378     const Address& address,
7379     Label* fixup_label,
7380     ReadBarrierOption read_barrier_option) {
7381   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7382   if (read_barrier_option == kWithReadBarrier) {
7383     DCHECK(kEmitCompilerReadBarrier);
7384     if (kUseBakerReadBarrier) {
7385       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7386       // Baker's read barrier are used:
7387       //
7388       //   root = obj.field;
7389       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7390       //   if (temp != null) {
7391       //     root = temp(root)
7392       //   }
7393 
7394       // /* GcRoot<mirror::Object> */ root = *address
7395       __ movl(root_reg, address);
7396       if (fixup_label != nullptr) {
7397         __ Bind(fixup_label);
7398       }
7399       static_assert(
7400           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7401           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7402           "have different sizes.");
7403       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7404                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7405                     "have different sizes.");
7406 
7407       // Slow path marking the GC root `root`.
7408       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7409           instruction, root, /* unpoison_ref_before_marking= */ false);
7410       codegen_->AddSlowPath(slow_path);
7411 
7412       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7413       const int32_t entry_point_offset =
7414           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7415       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7416       // The entrypoint is null when the GC is not marking.
7417       __ j(kNotEqual, slow_path->GetEntryLabel());
7418       __ Bind(slow_path->GetExitLabel());
7419     } else {
7420       // GC root loaded through a slow path for read barriers other
7421       // than Baker's.
7422       // /* GcRoot<mirror::Object>* */ root = address
7423       __ leaq(root_reg, address);
7424       if (fixup_label != nullptr) {
7425         __ Bind(fixup_label);
7426       }
7427       // /* mirror::Object* */ root = root->Read()
7428       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7429     }
7430   } else {
7431     // Plain GC root load with no read barrier.
7432     // /* GcRoot<mirror::Object> */ root = *address
7433     __ movl(root_reg, address);
7434     if (fixup_label != nullptr) {
7435       __ Bind(fixup_label);
7436     }
7437     // Note that GC roots are not affected by heap poisoning, thus we
7438     // do not have to unpoison `root_reg` here.
7439   }
7440 }
7441 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7442 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7443                                                                 Location ref,
7444                                                                 CpuRegister obj,
7445                                                                 uint32_t offset,
7446                                                                 bool needs_null_check) {
7447   DCHECK(kEmitCompilerReadBarrier);
7448   DCHECK(kUseBakerReadBarrier);
7449 
7450   // /* HeapReference<Object> */ ref = *(obj + offset)
7451   Address src(obj, offset);
7452   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7453 }
7454 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7455 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7456                                                                 Location ref,
7457                                                                 CpuRegister obj,
7458                                                                 uint32_t data_offset,
7459                                                                 Location index,
7460                                                                 bool needs_null_check) {
7461   DCHECK(kEmitCompilerReadBarrier);
7462   DCHECK(kUseBakerReadBarrier);
7463 
7464   static_assert(
7465       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7466       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7467   // /* HeapReference<Object> */ ref =
7468   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7469   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7470   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7471 }
7472 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7473 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7474                                                                     Location ref,
7475                                                                     CpuRegister obj,
7476                                                                     const Address& src,
7477                                                                     bool needs_null_check,
7478                                                                     bool always_update_field,
7479                                                                     CpuRegister* temp1,
7480                                                                     CpuRegister* temp2) {
7481   DCHECK(kEmitCompilerReadBarrier);
7482   DCHECK(kUseBakerReadBarrier);
7483 
7484   // In slow path based read barriers, the read barrier call is
7485   // inserted after the original load. However, in fast path based
7486   // Baker's read barriers, we need to perform the load of
7487   // mirror::Object::monitor_ *before* the original reference load.
7488   // This load-load ordering is required by the read barrier.
7489   // The fast path/slow path (for Baker's algorithm) should look like:
7490   //
7491   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7492   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7493   //   HeapReference<Object> ref = *src;  // Original reference load.
7494   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7495   //   if (is_gray) {
7496   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7497   //   }
7498   //
7499   // Note: the original implementation in ReadBarrier::Barrier is
7500   // slightly more complex as:
7501   // - it implements the load-load fence using a data dependency on
7502   //   the high-bits of rb_state, which are expected to be all zeroes
7503   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7504   //   here, which is a no-op thanks to the x86-64 memory model);
7505   // - it performs additional checks that we do not do here for
7506   //   performance reasons.
7507 
7508   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7509   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7510 
7511   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7512   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7513   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7514   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7515   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7516   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7517 
7518   // if (rb_state == ReadBarrier::GrayState())
7519   //   ref = ReadBarrier::Mark(ref);
7520   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7521   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7522   if (needs_null_check) {
7523     MaybeRecordImplicitNullCheck(instruction);
7524   }
7525 
7526   // Load fence to prevent load-load reordering.
7527   // Note that this is a no-op, thanks to the x86-64 memory model.
7528   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7529 
7530   // The actual reference load.
7531   // /* HeapReference<Object> */ ref = *src
7532   __ movl(ref_reg, src);  // Flags are unaffected.
7533 
7534   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7535   // Slow path marking the object `ref` when it is gray.
7536   SlowPathCode* slow_path;
7537   if (always_update_field) {
7538     DCHECK(temp1 != nullptr);
7539     DCHECK(temp2 != nullptr);
7540     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7541         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7542   } else {
7543     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7544         instruction, ref, /* unpoison_ref_before_marking= */ true);
7545   }
7546   AddSlowPath(slow_path);
7547 
7548   // We have done the "if" of the gray bit check above, now branch based on the flags.
7549   __ j(kNotZero, slow_path->GetEntryLabel());
7550 
7551   // Object* ref = ref_addr->AsMirrorPtr()
7552   __ MaybeUnpoisonHeapReference(ref_reg);
7553 
7554   __ Bind(slow_path->GetExitLabel());
7555 }
7556 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7557 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7558                                                   Location out,
7559                                                   Location ref,
7560                                                   Location obj,
7561                                                   uint32_t offset,
7562                                                   Location index) {
7563   DCHECK(kEmitCompilerReadBarrier);
7564 
7565   // Insert a slow path based read barrier *after* the reference load.
7566   //
7567   // If heap poisoning is enabled, the unpoisoning of the loaded
7568   // reference will be carried out by the runtime within the slow
7569   // path.
7570   //
7571   // Note that `ref` currently does not get unpoisoned (when heap
7572   // poisoning is enabled), which is alright as the `ref` argument is
7573   // not used by the artReadBarrierSlow entry point.
7574   //
7575   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7576   SlowPathCode* slow_path = new (GetScopedAllocator())
7577       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7578   AddSlowPath(slow_path);
7579 
7580   __ jmp(slow_path->GetEntryLabel());
7581   __ Bind(slow_path->GetExitLabel());
7582 }
7583 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7584 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7585                                                        Location out,
7586                                                        Location ref,
7587                                                        Location obj,
7588                                                        uint32_t offset,
7589                                                        Location index) {
7590   if (kEmitCompilerReadBarrier) {
7591     // Baker's read barriers shall be handled by the fast path
7592     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7593     DCHECK(!kUseBakerReadBarrier);
7594     // If heap poisoning is enabled, unpoisoning will be taken care of
7595     // by the runtime within the slow path.
7596     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7597   } else if (kPoisonHeapReferences) {
7598     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7599   }
7600 }
7601 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7602 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7603                                                          Location out,
7604                                                          Location root) {
7605   DCHECK(kEmitCompilerReadBarrier);
7606 
7607   // Insert a slow path based read barrier *after* the GC root load.
7608   //
7609   // Note that GC roots are not affected by heap poisoning, so we do
7610   // not need to do anything special for this here.
7611   SlowPathCode* slow_path =
7612       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7613   AddSlowPath(slow_path);
7614 
7615   __ jmp(slow_path->GetEntryLabel());
7616   __ Bind(slow_path->GetExitLabel());
7617 }
7618 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7619 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7620   // Nothing to do, this should be removed during prepare for register allocator.
7621   LOG(FATAL) << "Unreachable";
7622 }
7623 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7624 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7625   // Nothing to do, this should be removed during prepare for register allocator.
7626   LOG(FATAL) << "Unreachable";
7627 }
7628 
7629 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7630 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7631   LocationSummary* locations =
7632       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7633   locations->SetInAt(0, Location::RequiresRegister());
7634   locations->AddTemp(Location::RequiresRegister());
7635   locations->AddTemp(Location::RequiresRegister());
7636 }
7637 
VisitPackedSwitch(HPackedSwitch * switch_instr)7638 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7639   int32_t lower_bound = switch_instr->GetStartValue();
7640   uint32_t num_entries = switch_instr->GetNumEntries();
7641   LocationSummary* locations = switch_instr->GetLocations();
7642   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7643   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7644   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7645   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7646 
7647   // Should we generate smaller inline compare/jumps?
7648   if (num_entries <= kPackedSwitchJumpTableThreshold) {
7649     // Figure out the correct compare values and jump conditions.
7650     // Handle the first compare/branch as a special case because it might
7651     // jump to the default case.
7652     DCHECK_GT(num_entries, 2u);
7653     Condition first_condition;
7654     uint32_t index;
7655     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7656     if (lower_bound != 0) {
7657       first_condition = kLess;
7658       __ cmpl(value_reg_in, Immediate(lower_bound));
7659       __ j(first_condition, codegen_->GetLabelOf(default_block));
7660       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7661 
7662       index = 1;
7663     } else {
7664       // Handle all the compare/jumps below.
7665       first_condition = kBelow;
7666       index = 0;
7667     }
7668 
7669     // Handle the rest of the compare/jumps.
7670     for (; index + 1 < num_entries; index += 2) {
7671       int32_t compare_to_value = lower_bound + index + 1;
7672       __ cmpl(value_reg_in, Immediate(compare_to_value));
7673       // Jump to successors[index] if value < case_value[index].
7674       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7675       // Jump to successors[index + 1] if value == case_value[index + 1].
7676       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7677     }
7678 
7679     if (index != num_entries) {
7680       // There are an odd number of entries. Handle the last one.
7681       DCHECK_EQ(index + 1, num_entries);
7682       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7683       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7684     }
7685 
7686     // And the default for any other value.
7687     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7688       __ jmp(codegen_->GetLabelOf(default_block));
7689     }
7690     return;
7691   }
7692 
7693   // Remove the bias, if needed.
7694   Register value_reg_out = value_reg_in.AsRegister();
7695   if (lower_bound != 0) {
7696     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7697     value_reg_out = temp_reg.AsRegister();
7698   }
7699   CpuRegister value_reg(value_reg_out);
7700 
7701   // Is the value in range?
7702   __ cmpl(value_reg, Immediate(num_entries - 1));
7703   __ j(kAbove, codegen_->GetLabelOf(default_block));
7704 
7705   // We are in the range of the table.
7706   // Load the address of the jump table in the constant area.
7707   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7708 
7709   // Load the (signed) offset from the jump table.
7710   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7711 
7712   // Add the offset to the address of the table base.
7713   __ addq(temp_reg, base_reg);
7714 
7715   // And jump.
7716   __ jmp(temp_reg);
7717 }
7718 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7719 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7720                                                       ATTRIBUTE_UNUSED) {
7721   LOG(FATAL) << "Unreachable";
7722 }
7723 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7724 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7725                                                               ATTRIBUTE_UNUSED) {
7726   LOG(FATAL) << "Unreachable";
7727 }
7728 
Load32BitValue(CpuRegister dest,int32_t value)7729 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7730   if (value == 0) {
7731     __ xorl(dest, dest);
7732   } else {
7733     __ movl(dest, Immediate(value));
7734   }
7735 }
7736 
Load64BitValue(CpuRegister dest,int64_t value)7737 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7738   if (value == 0) {
7739     // Clears upper bits too.
7740     __ xorl(dest, dest);
7741   } else if (IsUint<32>(value)) {
7742     // We can use a 32 bit move, as it will zero-extend and is shorter.
7743     __ movl(dest, Immediate(static_cast<int32_t>(value)));
7744   } else {
7745     __ movq(dest, Immediate(value));
7746   }
7747 }
7748 
Load32BitValue(XmmRegister dest,int32_t value)7749 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7750   if (value == 0) {
7751     __ xorps(dest, dest);
7752   } else {
7753     __ movss(dest, LiteralInt32Address(value));
7754   }
7755 }
7756 
Load64BitValue(XmmRegister dest,int64_t value)7757 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7758   if (value == 0) {
7759     __ xorpd(dest, dest);
7760   } else {
7761     __ movsd(dest, LiteralInt64Address(value));
7762   }
7763 }
7764 
Load32BitValue(XmmRegister dest,float value)7765 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7766   Load32BitValue(dest, bit_cast<int32_t, float>(value));
7767 }
7768 
Load64BitValue(XmmRegister dest,double value)7769 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7770   Load64BitValue(dest, bit_cast<int64_t, double>(value));
7771 }
7772 
Compare32BitValue(CpuRegister dest,int32_t value)7773 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7774   if (value == 0) {
7775     __ testl(dest, dest);
7776   } else {
7777     __ cmpl(dest, Immediate(value));
7778   }
7779 }
7780 
Compare64BitValue(CpuRegister dest,int64_t value)7781 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7782   if (IsInt<32>(value)) {
7783     if (value == 0) {
7784       __ testq(dest, dest);
7785     } else {
7786       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7787     }
7788   } else {
7789     // Value won't fit in an int.
7790     __ cmpq(dest, LiteralInt64Address(value));
7791   }
7792 }
7793 
GenerateIntCompare(Location lhs,Location rhs)7794 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7795   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7796   GenerateIntCompare(lhs_reg, rhs);
7797 }
7798 
GenerateIntCompare(CpuRegister lhs,Location rhs)7799 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7800   if (rhs.IsConstant()) {
7801     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7802     Compare32BitValue(lhs, value);
7803   } else if (rhs.IsStackSlot()) {
7804     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7805   } else {
7806     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7807   }
7808 }
7809 
GenerateLongCompare(Location lhs,Location rhs)7810 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7811   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7812   if (rhs.IsConstant()) {
7813     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7814     Compare64BitValue(lhs_reg, value);
7815   } else if (rhs.IsDoubleStackSlot()) {
7816     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7817   } else {
7818     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7819   }
7820 }
7821 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7822 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7823                                           Location index,
7824                                           ScaleFactor scale,
7825                                           uint32_t data_offset) {
7826   return index.IsConstant() ?
7827       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7828       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7829 }
7830 
Store64BitValueToStack(Location dest,int64_t value)7831 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7832   DCHECK(dest.IsDoubleStackSlot());
7833   if (IsInt<32>(value)) {
7834     // Can move directly as an int32 constant.
7835     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7836             Immediate(static_cast<int32_t>(value)));
7837   } else {
7838     Load64BitValue(CpuRegister(TMP), value);
7839     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7840   }
7841 }
7842 
7843 /**
7844  * Class to handle late fixup of offsets into constant area.
7845  */
7846 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7847  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7848   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7849       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7850 
7851  protected:
SetOffset(size_t offset)7852   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7853 
7854   CodeGeneratorX86_64* codegen_;
7855 
7856  private:
Process(const MemoryRegion & region,int pos)7857   void Process(const MemoryRegion& region, int pos) override {
7858     // Patch the correct offset for the instruction.  We use the address of the
7859     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7860     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7861     int32_t relative_position = constant_offset - pos;
7862 
7863     // Patch in the right value.
7864     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7865   }
7866 
7867   // Location in constant area that the fixup refers to.
7868   size_t offset_into_constant_area_;
7869 };
7870 
7871 /**
7872  t * Class to handle late fixup of offsets to a jump table that will be created in the
7873  * constant area.
7874  */
7875 class JumpTableRIPFixup : public RIPFixup {
7876  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7877   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7878       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7879 
CreateJumpTable()7880   void CreateJumpTable() {
7881     X86_64Assembler* assembler = codegen_->GetAssembler();
7882 
7883     // Ensure that the reference to the jump table has the correct offset.
7884     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7885     SetOffset(offset_in_constant_table);
7886 
7887     // Compute the offset from the start of the function to this jump table.
7888     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7889 
7890     // Populate the jump table with the correct values for the jump table.
7891     int32_t num_entries = switch_instr_->GetNumEntries();
7892     HBasicBlock* block = switch_instr_->GetBlock();
7893     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7894     // The value that we want is the target offset - the position of the table.
7895     for (int32_t i = 0; i < num_entries; i++) {
7896       HBasicBlock* b = successors[i];
7897       Label* l = codegen_->GetLabelOf(b);
7898       DCHECK(l->IsBound());
7899       int32_t offset_to_block = l->Position() - current_table_offset;
7900       assembler->AppendInt32(offset_to_block);
7901     }
7902   }
7903 
7904  private:
7905   const HPackedSwitch* switch_instr_;
7906 };
7907 
Finalize(CodeAllocator * allocator)7908 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7909   // Generate the constant area if needed.
7910   X86_64Assembler* assembler = GetAssembler();
7911   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7912     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7913     assembler->Align(4, 0);
7914     constant_area_start_ = assembler->CodeSize();
7915 
7916     // Populate any jump tables.
7917     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7918       jump_table->CreateJumpTable();
7919     }
7920 
7921     // And now add the constant area to the generated code.
7922     assembler->AddConstantArea();
7923   }
7924 
7925   // And finish up.
7926   CodeGenerator::Finalize(allocator);
7927 }
7928 
LiteralDoubleAddress(double v)7929 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7930   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7931   return Address::RIP(fixup);
7932 }
7933 
LiteralFloatAddress(float v)7934 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7935   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7936   return Address::RIP(fixup);
7937 }
7938 
LiteralInt32Address(int32_t v)7939 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7940   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7941   return Address::RIP(fixup);
7942 }
7943 
LiteralInt64Address(int64_t v)7944 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7945   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7946   return Address::RIP(fixup);
7947 }
7948 
7949 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7950 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7951   if (!trg.IsValid()) {
7952     DCHECK_EQ(type, DataType::Type::kVoid);
7953     return;
7954   }
7955 
7956   DCHECK_NE(type, DataType::Type::kVoid);
7957 
7958   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7959   if (trg.Equals(return_loc)) {
7960     return;
7961   }
7962 
7963   // Let the parallel move resolver take care of all of this.
7964   HParallelMove parallel_move(GetGraph()->GetAllocator());
7965   parallel_move.AddMove(return_loc, trg, type, nullptr);
7966   GetMoveResolver()->EmitNativeCode(&parallel_move);
7967 }
7968 
LiteralCaseTable(HPackedSwitch * switch_instr)7969 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7970   // Create a fixup to be used to create and address the jump table.
7971   JumpTableRIPFixup* table_fixup =
7972       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7973 
7974   // We have to populate the jump tables.
7975   fixups_to_jump_tables_.push_back(table_fixup);
7976   return Address::RIP(table_fixup);
7977 }
7978 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7979 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7980                                              const Address& addr_high,
7981                                              int64_t v,
7982                                              HInstruction* instruction) {
7983   if (IsInt<32>(v)) {
7984     int32_t v_32 = v;
7985     __ movq(addr_low, Immediate(v_32));
7986     MaybeRecordImplicitNullCheck(instruction);
7987   } else {
7988     // Didn't fit in a register.  Do it in pieces.
7989     int32_t low_v = Low32Bits(v);
7990     int32_t high_v = High32Bits(v);
7991     __ movl(addr_low, Immediate(low_v));
7992     MaybeRecordImplicitNullCheck(instruction);
7993     __ movl(addr_high, Immediate(high_v));
7994   }
7995 }
7996 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7997 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7998                                           const uint8_t* roots_data,
7999                                           const PatchInfo<Label>& info,
8000                                           uint64_t index_in_table) const {
8001   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8002   uintptr_t address =
8003       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8004   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8005   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8006      dchecked_integral_cast<uint32_t>(address);
8007 }
8008 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8009 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8010   for (const PatchInfo<Label>& info : jit_string_patches_) {
8011     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8012     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8013     PatchJitRootUse(code, roots_data, info, index_in_table);
8014   }
8015 
8016   for (const PatchInfo<Label>& info : jit_class_patches_) {
8017     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8018     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8019     PatchJitRootUse(code, roots_data, info, index_in_table);
8020   }
8021 }
8022 
CpuHasAvxFeatureFlag()8023 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8024   return codegen_->GetInstructionSetFeatures().HasAVX();
8025 }
8026 
CpuHasAvx2FeatureFlag()8027 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8028   return codegen_->GetInstructionSetFeatures().HasAVX2();
8029 }
8030 
CpuHasAvxFeatureFlag()8031 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8032   return codegen_->GetInstructionSetFeatures().HasAVX();
8033 }
8034 
CpuHasAvx2FeatureFlag()8035 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8036   return codegen_->GetInstructionSetFeatures().HasAVX2();
8037 }
8038 
8039 #undef __
8040 
8041 }  // namespace x86_64
8042 }  // namespace art
8043