1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/object_reference.h"
39 #include "mirror/var_handle.h"
40 #include "optimizing/nodes.h"
41 #include "profiling_info_builder.h"
42 #include "scoped_thread_state_change-inl.h"
43 #include "thread.h"
44 #include "trace.h"
45 #include "utils/assembler.h"
46 #include "utils/stack_checks.h"
47 #include "utils/x86_64/assembler_x86_64.h"
48 #include "utils/x86_64/constants_x86_64.h"
49 #include "utils/x86_64/managed_register_x86_64.h"
50 
51 namespace art HIDDEN {
52 
53 template<class MirrorType>
54 class GcRoot;
55 
56 namespace x86_64 {
57 
58 static constexpr int kCurrentMethodStackOffset = 0;
59 static constexpr Register kMethodRegisterArgument = RDI;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64 
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67 
68 static constexpr int kC2ConditionMask = 0x400;
69 
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71   // Custom calling convention: RAX serves as both input and output.
72   RegisterSet caller_saves = RegisterSet::Empty();
73   caller_saves.Add(Location::RegisterLocation(RAX));
74   return caller_saves;
75 }
76 
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80 
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87     __ Bind(GetEntryLabel());
88     if (instruction_->CanThrowIntoCatchBlock()) {
89       // Live registers will be restored in the catch block if caught.
90       SaveLiveRegisters(codegen, instruction_->GetLocations());
91     }
92     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
93                                   instruction_,
94                                   instruction_->GetDexPc(),
95                                   this);
96     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97   }
98 
IsFatal() const99   bool IsFatal() const override { return true; }
100 
GetDescription() const101   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
102 
103  private:
104   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
105 };
106 
107 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
108  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)109   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110 
EmitNativeCode(CodeGenerator * codegen)111   void EmitNativeCode(CodeGenerator* codegen) override {
112     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
113     __ Bind(GetEntryLabel());
114     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116   }
117 
IsFatal() const118   bool IsFatal() const override { return true; }
119 
GetDescription() const120   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
121 
122  private:
123   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
124 };
125 
126 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
127  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)128   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
129       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
130 
EmitNativeCode(CodeGenerator * codegen)131   void EmitNativeCode(CodeGenerator* codegen) override {
132     __ Bind(GetEntryLabel());
133     if (type_ == DataType::Type::kInt32) {
134       if (is_div_) {
135         __ negl(cpu_reg_);
136       } else {
137         __ xorl(cpu_reg_, cpu_reg_);
138       }
139 
140     } else {
141       DCHECK_EQ(DataType::Type::kInt64, type_);
142       if (is_div_) {
143         __ negq(cpu_reg_);
144       } else {
145         __ xorl(cpu_reg_, cpu_reg_);
146       }
147     }
148     __ jmp(GetExitLabel());
149   }
150 
GetDescription() const151   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
152 
153  private:
154   const CpuRegister cpu_reg_;
155   const DataType::Type type_;
156   const bool is_div_;
157   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
158 };
159 
160 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
161  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)162   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
163       : SlowPathCode(instruction), successor_(successor) {}
164 
EmitNativeCode(CodeGenerator * codegen)165   void EmitNativeCode(CodeGenerator* codegen) override {
166     LocationSummary* locations = instruction_->GetLocations();
167     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
168     __ Bind(GetEntryLabel());
169     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
170     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
171     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
172     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
173     if (successor_ == nullptr) {
174       __ jmp(GetReturnLabel());
175     } else {
176       __ jmp(x86_64_codegen->GetLabelOf(successor_));
177     }
178   }
179 
GetReturnLabel()180   Label* GetReturnLabel() {
181     DCHECK(successor_ == nullptr);
182     return &return_label_;
183   }
184 
GetSuccessor() const185   HBasicBlock* GetSuccessor() const {
186     return successor_;
187   }
188 
GetDescription() const189   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
190 
191  private:
192   HBasicBlock* const successor_;
193   Label return_label_;
194 
195   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
196 };
197 
198 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
199  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)200   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
201     : SlowPathCode(instruction) {}
202 
EmitNativeCode(CodeGenerator * codegen)203   void EmitNativeCode(CodeGenerator* codegen) override {
204     LocationSummary* locations = instruction_->GetLocations();
205     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
206     __ Bind(GetEntryLabel());
207     if (instruction_->CanThrowIntoCatchBlock()) {
208       // Live registers will be restored in the catch block if caught.
209       SaveLiveRegisters(codegen, locations);
210     }
211 
212     Location index_loc = locations->InAt(0);
213     Location length_loc = locations->InAt(1);
214     InvokeRuntimeCallingConvention calling_convention;
215     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
216     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
217 
218     // Are we using an array length from memory?
219     if (!length_loc.IsValid()) {
220       DCHECK(instruction_->InputAt(1)->IsArrayLength());
221       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
222       DCHECK(array_length->IsEmittedAtUseSite());
223       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
224       Location array_loc = array_length->GetLocations()->InAt(0);
225       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
226       if (!index_loc.Equals(length_arg)) {
227         // The index is not clobbered by loading the length directly to `length_arg`.
228         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
229         x86_64_codegen->Move(index_arg, index_loc);
230       } else if (!array_loc.Equals(index_arg)) {
231         // The array reference is not clobbered by the index move.
232         x86_64_codegen->Move(index_arg, index_loc);
233         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
234       } else {
235         // Load the array length into `TMP`.
236         DCHECK(codegen->IsBlockedCoreRegister(TMP));
237         __ movl(CpuRegister(TMP), array_len);
238         // Single move to CPU register does not clobber `TMP`.
239         x86_64_codegen->Move(index_arg, index_loc);
240         __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
241       }
242       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
243         __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
244       }
245     } else {
246       // We're moving two locations to locations that could overlap,
247       // so we need a parallel move resolver.
248       codegen->EmitParallelMoves(
249           index_loc,
250           index_arg,
251           DataType::Type::kInt32,
252           length_loc,
253           length_arg,
254           DataType::Type::kInt32);
255     }
256 
257     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
258         ? kQuickThrowStringBounds
259         : kQuickThrowArrayBounds;
260     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
261     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
262     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
263   }
264 
IsFatal() const265   bool IsFatal() const override { return true; }
266 
GetDescription() const267   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
268 
269  private:
270   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
271 };
272 
273 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
274  public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)275   explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
276 
EmitNativeCode(CodeGenerator * codegen)277   void EmitNativeCode(CodeGenerator* codegen) override {
278     LocationSummary* locations = instruction_->GetLocations();
279     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
280 
281     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
282     __ Bind(GetEntryLabel());
283     SaveLiveRegisters(codegen, locations);
284 
285     const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
286     // Custom calling convention: RAX serves as both input and output.
287     __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
288     x86_64_codegen->InvokeRuntime(kQuickResolveMethodType,
289                                   instruction_,
290                                   instruction_->GetDexPc(),
291                                   this);
292     CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
293     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
294     RestoreLiveRegisters(codegen, locations);
295 
296     __ jmp(GetExitLabel());
297   }
298 
GetDescription() const299   const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
300 
301  private:
302   DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
303 };
304 
305 class LoadClassSlowPathX86_64 : public SlowPathCode {
306  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)307   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
308       : SlowPathCode(at), cls_(cls) {
309     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
310     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
311   }
312 
EmitNativeCode(CodeGenerator * codegen)313   void EmitNativeCode(CodeGenerator* codegen) override {
314     LocationSummary* locations = instruction_->GetLocations();
315     Location out = locations->Out();
316     const uint32_t dex_pc = instruction_->GetDexPc();
317     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
318     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
319 
320     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
321     __ Bind(GetEntryLabel());
322     SaveLiveRegisters(codegen, locations);
323 
324     // Custom calling convention: RAX serves as both input and output.
325     if (must_resolve_type) {
326       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
327              x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
328              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
329                              &cls_->GetDexFile()));
330       dex::TypeIndex type_index = cls_->GetTypeIndex();
331       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
332       if (cls_->NeedsAccessCheck()) {
333         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
334         x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
335       } else {
336         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
337         x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
338       }
339       // If we also must_do_clinit, the resolved type is now in the correct register.
340     } else {
341       DCHECK(must_do_clinit);
342       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
343       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
344     }
345     if (must_do_clinit) {
346       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
347       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
348     }
349 
350     // Move the class to the desired location.
351     if (out.IsValid()) {
352       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
354     }
355 
356     RestoreLiveRegisters(codegen, locations);
357     __ jmp(GetExitLabel());
358   }
359 
GetDescription() const360   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
361 
362  private:
363   // The class this slow path will load.
364   HLoadClass* const cls_;
365 
366   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
367 };
368 
369 class LoadStringSlowPathX86_64 : public SlowPathCode {
370  public:
LoadStringSlowPathX86_64(HLoadString * instruction)371   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
372 
EmitNativeCode(CodeGenerator * codegen)373   void EmitNativeCode(CodeGenerator* codegen) override {
374     LocationSummary* locations = instruction_->GetLocations();
375     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376 
377     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378     __ Bind(GetEntryLabel());
379     SaveLiveRegisters(codegen, locations);
380 
381     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382     // Custom calling convention: RAX serves as both input and output.
383     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
384     x86_64_codegen->InvokeRuntime(kQuickResolveString,
385                                   instruction_,
386                                   instruction_->GetDexPc(),
387                                   this);
388     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
389     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
390     RestoreLiveRegisters(codegen, locations);
391 
392     __ jmp(GetExitLabel());
393   }
394 
GetDescription() const395   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
396 
397  private:
398   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
399 };
400 
401 class TypeCheckSlowPathX86_64 : public SlowPathCode {
402  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)403   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
404       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) override {
407     LocationSummary* locations = instruction_->GetLocations();
408     uint32_t dex_pc = instruction_->GetDexPc();
409     DCHECK(instruction_->IsCheckCast()
410            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
411 
412     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
413     __ Bind(GetEntryLabel());
414 
415     if (kPoisonHeapReferences &&
416         instruction_->IsCheckCast() &&
417         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
418       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
419       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
420     }
421 
422     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
423       SaveLiveRegisters(codegen, locations);
424     }
425 
426     // We're moving two locations to locations that could overlap, so we need a parallel
427     // move resolver.
428     InvokeRuntimeCallingConvention calling_convention;
429     codegen->EmitParallelMoves(locations->InAt(0),
430                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
431                                DataType::Type::kReference,
432                                locations->InAt(1),
433                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
434                                DataType::Type::kReference);
435     if (instruction_->IsInstanceOf()) {
436       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
437       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
438     } else {
439       DCHECK(instruction_->IsCheckCast());
440       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
441       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
442     }
443 
444     if (!is_fatal_) {
445       if (instruction_->IsInstanceOf()) {
446         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
447       }
448 
449       RestoreLiveRegisters(codegen, locations);
450       __ jmp(GetExitLabel());
451     }
452   }
453 
GetDescription() const454   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
455 
IsFatal() const456   bool IsFatal() const override { return is_fatal_; }
457 
458  private:
459   const bool is_fatal_;
460 
461   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
462 };
463 
464 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
465  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)466   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
467       : SlowPathCode(instruction) {}
468 
EmitNativeCode(CodeGenerator * codegen)469   void EmitNativeCode(CodeGenerator* codegen) override {
470     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
471     __ Bind(GetEntryLabel());
472     LocationSummary* locations = instruction_->GetLocations();
473     SaveLiveRegisters(codegen, locations);
474     InvokeRuntimeCallingConvention calling_convention;
475     x86_64_codegen->Load32BitValue(
476         CpuRegister(calling_convention.GetRegisterAt(0)),
477         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
478     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
479     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
480   }
481 
GetDescription() const482   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
483 
484  private:
485   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
486 };
487 
488 class ArraySetSlowPathX86_64 : public SlowPathCode {
489  public:
ArraySetSlowPathX86_64(HInstruction * instruction)490   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
491 
EmitNativeCode(CodeGenerator * codegen)492   void EmitNativeCode(CodeGenerator* codegen) override {
493     LocationSummary* locations = instruction_->GetLocations();
494     __ Bind(GetEntryLabel());
495     SaveLiveRegisters(codegen, locations);
496 
497     InvokeRuntimeCallingConvention calling_convention;
498     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
499     parallel_move.AddMove(
500         locations->InAt(0),
501         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
502         DataType::Type::kReference,
503         nullptr);
504     parallel_move.AddMove(
505         locations->InAt(1),
506         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
507         DataType::Type::kInt32,
508         nullptr);
509     parallel_move.AddMove(
510         locations->InAt(2),
511         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
512         DataType::Type::kReference,
513         nullptr);
514     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
515 
516     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
518     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
519     RestoreLiveRegisters(codegen, locations);
520     __ jmp(GetExitLabel());
521   }
522 
GetDescription() const523   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
524 
525  private:
526   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
527 };
528 
529 // Slow path marking an object reference `ref` during a read
530 // barrier. The field `obj.field` in the object `obj` holding this
531 // reference does not get updated by this slow path after marking (see
532 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
533 //
534 // This means that after the execution of this slow path, `ref` will
535 // always be up-to-date, but `obj.field` may not; i.e., after the
536 // flip, `ref` will be a to-space reference, but `obj.field` will
537 // probably still be a from-space reference (unless it gets updated by
538 // another thread, or if another thread installed another object
539 // reference (different from `ref`) in `obj.field`).
540 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
541  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)542   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
543                                 Location ref,
544                                 bool unpoison_ref_before_marking)
545       : SlowPathCode(instruction),
546         ref_(ref),
547         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
548   }
549 
GetDescription() const550   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
551 
EmitNativeCode(CodeGenerator * codegen)552   void EmitNativeCode(CodeGenerator* codegen) override {
553     DCHECK(codegen->EmitReadBarrier());
554     LocationSummary* locations = instruction_->GetLocations();
555     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
556     Register ref_reg = ref_cpu_reg.AsRegister();
557     DCHECK(locations->CanCall());
558     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
559     DCHECK(instruction_->IsInstanceFieldGet() ||
560            instruction_->IsStaticFieldGet() ||
561            instruction_->IsArrayGet() ||
562            instruction_->IsArraySet() ||
563            instruction_->IsLoadClass() ||
564            instruction_->IsLoadMethodType() ||
565            instruction_->IsLoadString() ||
566            instruction_->IsInstanceOf() ||
567            instruction_->IsCheckCast() ||
568            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
569         << "Unexpected instruction in read barrier marking slow path: "
570         << instruction_->DebugName();
571 
572     __ Bind(GetEntryLabel());
573     if (unpoison_ref_before_marking_) {
574       // Object* ref = ref_addr->AsMirrorPtr()
575       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
576     }
577     // No need to save live registers; it's taken care of by the
578     // entrypoint. Also, there is no need to update the stack mask,
579     // as this runtime call will not trigger a garbage collection.
580     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
581     DCHECK_NE(ref_reg, RSP);
582     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583     // "Compact" slow path, saving two moves.
584     //
585     // Instead of using the standard runtime calling convention (input
586     // and output in R0):
587     //
588     //   RDI <- ref
589     //   RAX <- ReadBarrierMark(RDI)
590     //   ref <- RAX
591     //
592     // we just use rX (the register containing `ref`) as input and output
593     // of a dedicated entrypoint:
594     //
595     //   rX <- ReadBarrierMarkRegX(rX)
596     //
597     int32_t entry_point_offset =
598         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
599     // This runtime call does not require a stack map.
600     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
601     __ jmp(GetExitLabel());
602   }
603 
604  private:
605   // The location (register) of the marked object reference.
606   const Location ref_;
607   // Should the reference in `ref_` be unpoisoned prior to marking it?
608   const bool unpoison_ref_before_marking_;
609 
610   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
611 };
612 
613 // Slow path marking an object reference `ref` during a read barrier,
614 // and if needed, atomically updating the field `obj.field` in the
615 // object `obj` holding this reference after marking (contrary to
616 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
617 // `obj.field`).
618 //
619 // This means that after the execution of this slow path, both `ref`
620 // and `obj.field` will be up-to-date; i.e., after the flip, both will
621 // hold the same to-space reference (unless another thread installed
622 // another object reference (different from `ref`) in `obj.field`).
623 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
624  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)625   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
626                                               Location ref,
627                                               CpuRegister obj,
628                                               const Address& field_addr,
629                                               bool unpoison_ref_before_marking,
630                                               CpuRegister temp1,
631                                               CpuRegister temp2)
632       : SlowPathCode(instruction),
633         ref_(ref),
634         obj_(obj),
635         field_addr_(field_addr),
636         unpoison_ref_before_marking_(unpoison_ref_before_marking),
637         temp1_(temp1),
638         temp2_(temp2) {
639   }
640 
GetDescription() const641   const char* GetDescription() const override {
642     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
643   }
644 
EmitNativeCode(CodeGenerator * codegen)645   void EmitNativeCode(CodeGenerator* codegen) override {
646     DCHECK(codegen->EmitReadBarrier());
647     LocationSummary* locations = instruction_->GetLocations();
648     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
649     Register ref_reg = ref_cpu_reg.AsRegister();
650     DCHECK(locations->CanCall());
651     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
652     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
653         << "Unexpected instruction in read barrier marking and field updating slow path: "
654         << instruction_->DebugName();
655     HInvoke* invoke = instruction_->AsInvoke();
656     DCHECK(IsUnsafeCASReference(invoke) ||
657            IsUnsafeGetAndSetReference(invoke) ||
658            IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
659 
660     __ Bind(GetEntryLabel());
661     if (unpoison_ref_before_marking_) {
662       // Object* ref = ref_addr->AsMirrorPtr()
663       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
664     }
665 
666     // Save the old (unpoisoned) reference.
667     __ movl(temp1_, ref_cpu_reg);
668 
669     // No need to save live registers; it's taken care of by the
670     // entrypoint. Also, there is no need to update the stack mask,
671     // as this runtime call will not trigger a garbage collection.
672     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
673     DCHECK_NE(ref_reg, RSP);
674     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
675     // "Compact" slow path, saving two moves.
676     //
677     // Instead of using the standard runtime calling convention (input
678     // and output in R0):
679     //
680     //   RDI <- ref
681     //   RAX <- ReadBarrierMark(RDI)
682     //   ref <- RAX
683     //
684     // we just use rX (the register containing `ref`) as input and output
685     // of a dedicated entrypoint:
686     //
687     //   rX <- ReadBarrierMarkRegX(rX)
688     //
689     int32_t entry_point_offset =
690         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
691     // This runtime call does not require a stack map.
692     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
693 
694     // If the new reference is different from the old reference,
695     // update the field in the holder (`*field_addr`).
696     //
697     // Note that this field could also hold a different object, if
698     // another thread had concurrently changed it. In that case, the
699     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
700     // operation below would abort the CAS, leaving the field as-is.
701     NearLabel done;
702     __ cmpl(temp1_, ref_cpu_reg);
703     __ j(kEqual, &done);
704 
705     // Update the holder's field atomically.  This may fail if
706     // mutator updates before us, but it's OK.  This is achived
707     // using a strong compare-and-set (CAS) operation with relaxed
708     // memory synchronization ordering, where the expected value is
709     // the old reference and the desired value is the new reference.
710     // This operation is implemented with a 32-bit LOCK CMPXLCHG
711     // instruction, which requires the expected value (the old
712     // reference) to be in EAX.  Save RAX beforehand, and move the
713     // expected value (stored in `temp1_`) into EAX.
714     __ movq(temp2_, CpuRegister(RAX));
715     __ movl(CpuRegister(RAX), temp1_);
716 
717     // Convenience aliases.
718     CpuRegister base = obj_;
719     CpuRegister expected = CpuRegister(RAX);
720     CpuRegister value = ref_cpu_reg;
721 
722     bool base_equals_value = (base.AsRegister() == value.AsRegister());
723     Register value_reg = ref_reg;
724     if (kPoisonHeapReferences) {
725       if (base_equals_value) {
726         // If `base` and `value` are the same register location, move
727         // `value_reg` to a temporary register.  This way, poisoning
728         // `value_reg` won't invalidate `base`.
729         value_reg = temp1_.AsRegister();
730         __ movl(CpuRegister(value_reg), base);
731       }
732 
733       // Check that the register allocator did not assign the location
734       // of `expected` (RAX) to `value` nor to `base`, so that heap
735       // poisoning (when enabled) works as intended below.
736       // - If `value` were equal to `expected`, both references would
737       //   be poisoned twice, meaning they would not be poisoned at
738       //   all, as heap poisoning uses address negation.
739       // - If `base` were equal to `expected`, poisoning `expected`
740       //   would invalidate `base`.
741       DCHECK_NE(value_reg, expected.AsRegister());
742       DCHECK_NE(base.AsRegister(), expected.AsRegister());
743 
744       __ PoisonHeapReference(expected);
745       __ PoisonHeapReference(CpuRegister(value_reg));
746     }
747 
748     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
749 
750     // If heap poisoning is enabled, we need to unpoison the values
751     // that were poisoned earlier.
752     if (kPoisonHeapReferences) {
753       if (base_equals_value) {
754         // `value_reg` has been moved to a temporary register, no need
755         // to unpoison it.
756       } else {
757         __ UnpoisonHeapReference(CpuRegister(value_reg));
758       }
759       // No need to unpoison `expected` (RAX), as it is be overwritten below.
760     }
761 
762     // Restore RAX.
763     __ movq(CpuRegister(RAX), temp2_);
764 
765     __ Bind(&done);
766     __ jmp(GetExitLabel());
767   }
768 
769  private:
770   // The location (register) of the marked object reference.
771   const Location ref_;
772   // The register containing the object holding the marked object reference field.
773   const CpuRegister obj_;
774   // The address of the marked reference field.  The base of this address must be `obj_`.
775   const Address field_addr_;
776 
777   // Should the reference in `ref_` be unpoisoned prior to marking it?
778   const bool unpoison_ref_before_marking_;
779 
780   const CpuRegister temp1_;
781   const CpuRegister temp2_;
782 
783   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
784 };
785 
786 // Slow path generating a read barrier for a heap reference.
787 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
788  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)789   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
790                                             Location out,
791                                             Location ref,
792                                             Location obj,
793                                             uint32_t offset,
794                                             Location index)
795       : SlowPathCode(instruction),
796         out_(out),
797         ref_(ref),
798         obj_(obj),
799         offset_(offset),
800         index_(index) {
801     // If `obj` is equal to `out` or `ref`, it means the initial
802     // object has been overwritten by (or after) the heap object
803     // reference load to be instrumented, e.g.:
804     //
805     //   __ movl(out, Address(out, offset));
806     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
807     //
808     // In that case, we have lost the information about the original
809     // object, and the emitted read barrier cannot work properly.
810     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
811     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
812 }
813 
EmitNativeCode(CodeGenerator * codegen)814   void EmitNativeCode(CodeGenerator* codegen) override {
815     DCHECK(codegen->EmitReadBarrier());
816     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
817     LocationSummary* locations = instruction_->GetLocations();
818     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
819     DCHECK(locations->CanCall());
820     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
821     DCHECK(instruction_->IsInstanceFieldGet() ||
822            instruction_->IsStaticFieldGet() ||
823            instruction_->IsArrayGet() ||
824            instruction_->IsInstanceOf() ||
825            instruction_->IsCheckCast() ||
826            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
827         << "Unexpected instruction in read barrier for heap reference slow path: "
828         << instruction_->DebugName();
829 
830     __ Bind(GetEntryLabel());
831     SaveLiveRegisters(codegen, locations);
832 
833     // We may have to change the index's value, but as `index_` is a
834     // constant member (like other "inputs" of this slow path),
835     // introduce a copy of it, `index`.
836     Location index = index_;
837     if (index_.IsValid()) {
838       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
839       if (instruction_->IsArrayGet()) {
840         // Compute real offset and store it in index_.
841         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
842         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
843         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
844           // We are about to change the value of `index_reg` (see the
845           // calls to art::x86_64::X86_64Assembler::shll and
846           // art::x86_64::X86_64Assembler::AddImmediate below), but it
847           // has not been saved by the previous call to
848           // art::SlowPathCode::SaveLiveRegisters, as it is a
849           // callee-save register --
850           // art::SlowPathCode::SaveLiveRegisters does not consider
851           // callee-save registers, as it has been designed with the
852           // assumption that callee-save registers are supposed to be
853           // handled by the called function.  So, as a callee-save
854           // register, `index_reg` _would_ eventually be saved onto
855           // the stack, but it would be too late: we would have
856           // changed its value earlier.  Therefore, we manually save
857           // it here into another freely available register,
858           // `free_reg`, chosen of course among the caller-save
859           // registers (as a callee-save `free_reg` register would
860           // exhibit the same problem).
861           //
862           // Note we could have requested a temporary register from
863           // the register allocator instead; but we prefer not to, as
864           // this is a slow path, and we know we can find a
865           // caller-save register that is available.
866           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
867           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
868           index_reg = free_reg;
869           index = Location::RegisterLocation(index_reg);
870         } else {
871           // The initial register stored in `index_` has already been
872           // saved in the call to art::SlowPathCode::SaveLiveRegisters
873           // (as it is not a callee-save register), so we can freely
874           // use it.
875         }
876         // Shifting the index value contained in `index_reg` by the
877         // scale factor (2) cannot overflow in practice, as the
878         // runtime is unable to allocate object arrays with a size
879         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
880         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
881         static_assert(
882             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
883             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
884         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
885       } else {
886         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
887         // intrinsics, `index_` is not shifted by a scale factor of 2
888         // (as in the case of ArrayGet), as it is actually an offset
889         // to an object field within an object.
890         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
891         DCHECK(instruction_->GetLocations()->Intrinsified());
892         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
893                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
894                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
895                (instruction_->AsInvoke()->GetIntrinsic() ==
896                     Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
897                (instruction_->AsInvoke()->GetIntrinsic() ==
898                     Intrinsics::kJdkUnsafeGetReferenceAcquire))
899             << instruction_->AsInvoke()->GetIntrinsic();
900         DCHECK_EQ(offset_, 0U);
901         DCHECK(index_.IsRegister());
902       }
903     }
904 
905     // We're moving two or three locations to locations that could
906     // overlap, so we need a parallel move resolver.
907     InvokeRuntimeCallingConvention calling_convention;
908     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
909     parallel_move.AddMove(ref_,
910                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
911                           DataType::Type::kReference,
912                           nullptr);
913     parallel_move.AddMove(obj_,
914                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
915                           DataType::Type::kReference,
916                           nullptr);
917     if (index.IsValid()) {
918       parallel_move.AddMove(index,
919                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
920                             DataType::Type::kInt32,
921                             nullptr);
922       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
923     } else {
924       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
925       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
926     }
927     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
928                                   instruction_,
929                                   instruction_->GetDexPc(),
930                                   this);
931     CheckEntrypointTypes<
932         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
933     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
934 
935     RestoreLiveRegisters(codegen, locations);
936     __ jmp(GetExitLabel());
937   }
938 
GetDescription() const939   const char* GetDescription() const override {
940     return "ReadBarrierForHeapReferenceSlowPathX86_64";
941   }
942 
943  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)944   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
945     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
946     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
947     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
948       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
949         return static_cast<CpuRegister>(i);
950       }
951     }
952     // We shall never fail to find a free caller-save register, as
953     // there are more than two core caller-save registers on x86-64
954     // (meaning it is possible to find one which is different from
955     // `ref` and `obj`).
956     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
957     LOG(FATAL) << "Could not find a free caller-save register";
958     UNREACHABLE();
959   }
960 
961   const Location out_;
962   const Location ref_;
963   const Location obj_;
964   const uint32_t offset_;
965   // An additional location containing an index to an array.
966   // Only used for HArrayGet and the UnsafeGetObject &
967   // UnsafeGetObjectVolatile intrinsics.
968   const Location index_;
969 
970   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
971 };
972 
973 // Slow path generating a read barrier for a GC root.
974 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
975  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)976   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
977       : SlowPathCode(instruction), out_(out), root_(root) {
978   }
979 
EmitNativeCode(CodeGenerator * codegen)980   void EmitNativeCode(CodeGenerator* codegen) override {
981     DCHECK(codegen->EmitReadBarrier());
982     LocationSummary* locations = instruction_->GetLocations();
983     DCHECK(locations->CanCall());
984     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
985     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
986         << "Unexpected instruction in read barrier for GC root slow path: "
987         << instruction_->DebugName();
988 
989     __ Bind(GetEntryLabel());
990     SaveLiveRegisters(codegen, locations);
991 
992     InvokeRuntimeCallingConvention calling_convention;
993     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
994     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
995     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
996                                   instruction_,
997                                   instruction_->GetDexPc(),
998                                   this);
999     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1000     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
1001 
1002     RestoreLiveRegisters(codegen, locations);
1003     __ jmp(GetExitLabel());
1004   }
1005 
GetDescription() const1006   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
1007 
1008  private:
1009   const Location out_;
1010   const Location root_;
1011 
1012   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
1013 };
1014 
1015 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
1016  public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1017   explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1018       : SlowPathCode(instruction) {}
1019 
EmitNativeCode(CodeGenerator * codegen)1020   void EmitNativeCode(CodeGenerator* codegen) override {
1021     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1022     LocationSummary* locations = instruction_->GetLocations();
1023     QuickEntrypointEnum entry_point =
1024         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1025     __ Bind(GetEntryLabel());
1026     SaveLiveRegisters(codegen, locations);
1027     if (instruction_->IsMethodExitHook()) {
1028       // Load FrameSize to pass to the exit hook.
1029       __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1030     }
1031     x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
1032     RestoreLiveRegisters(codegen, locations);
1033     __ jmp(GetExitLabel());
1034   }
1035 
GetDescription() const1036   const char* GetDescription() const override {
1037     return "MethodEntryExitHooksSlowPath";
1038   }
1039 
1040  private:
1041   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1042 };
1043 
1044 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1045  public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1046   CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1047       : SlowPathCode(suspend_check),
1048         counter_address_(counter_address) {}
1049 
EmitNativeCode(CodeGenerator * codegen)1050   void EmitNativeCode(CodeGenerator* codegen) override {
1051     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1052     __ Bind(GetEntryLabel());
1053     __ movq(CpuRegister(TMP), Immediate(counter_address_));
1054     __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1055     if (instruction_ != nullptr) {
1056       // Only saves full width XMM for SIMD.
1057       SaveLiveRegisters(codegen, instruction_->GetLocations());
1058     }
1059     x86_64_codegen->GenerateInvokeRuntime(
1060         GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1061     if (instruction_ != nullptr) {
1062       // Only restores full width XMM for SIMD.
1063       RestoreLiveRegisters(codegen, instruction_->GetLocations());
1064     }
1065     __ jmp(GetExitLabel());
1066   }
1067 
GetDescription() const1068   const char* GetDescription() const override {
1069     return "CompileOptimizedSlowPath";
1070   }
1071 
1072  private:
1073   uint64_t counter_address_;
1074 
1075   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1076 };
1077 
1078 #undef __
1079 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1080 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
1081 
X86_64IntegerCondition(IfCondition cond)1082 inline Condition X86_64IntegerCondition(IfCondition cond) {
1083   switch (cond) {
1084     case kCondEQ: return kEqual;
1085     case kCondNE: return kNotEqual;
1086     case kCondLT: return kLess;
1087     case kCondLE: return kLessEqual;
1088     case kCondGT: return kGreater;
1089     case kCondGE: return kGreaterEqual;
1090     case kCondB:  return kBelow;
1091     case kCondBE: return kBelowEqual;
1092     case kCondA:  return kAbove;
1093     case kCondAE: return kAboveEqual;
1094   }
1095   LOG(FATAL) << "Unreachable";
1096   UNREACHABLE();
1097 }
1098 
1099 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1100 inline Condition X86_64FPCondition(IfCondition cond) {
1101   switch (cond) {
1102     case kCondEQ: return kEqual;
1103     case kCondNE: return kNotEqual;
1104     case kCondLT: return kBelow;
1105     case kCondLE: return kBelowEqual;
1106     case kCondGT: return kAbove;
1107     case kCondGE: return kAboveEqual;
1108     default:      break;  // should not happen
1109   }
1110   LOG(FATAL) << "Unreachable";
1111   UNREACHABLE();
1112 }
1113 
BlockNonVolatileXmmRegisters(LocationSummary * locations)1114 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1115   // We have to ensure that the native code we call directly (such as @CriticalNative
1116   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1117   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
1118   // that they are saved in the prologue and properly restored.
1119   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1120     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1121   }
1122 }
1123 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1124 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1125     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1126     [[maybe_unused]] ArtMethod* method) {
1127   return desired_dispatch_info;
1128 }
1129 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1130 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1131   switch (load_kind) {
1132     case MethodLoadKind::kBootImageLinkTimePcRelative:
1133       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1134       __ leal(temp.AsRegister<CpuRegister>(),
1135               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1136       RecordBootImageMethodPatch(invoke);
1137       break;
1138     case MethodLoadKind::kBootImageRelRo: {
1139       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1140       __ movl(temp.AsRegister<CpuRegister>(),
1141               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1142       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1143       break;
1144     }
1145     case MethodLoadKind::kBssEntry: {
1146       __ movq(temp.AsRegister<CpuRegister>(),
1147               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1148       RecordMethodBssEntryPatch(invoke);
1149       // No need for memory fence, thanks to the x86-64 memory model.
1150       break;
1151     }
1152     case MethodLoadKind::kJitDirectAddress: {
1153       Load64BitValue(temp.AsRegister<CpuRegister>(),
1154                      reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1155       break;
1156     }
1157     case MethodLoadKind::kRuntimeCall: {
1158       // Test situation, don't do anything.
1159       break;
1160     }
1161     default: {
1162       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1163       UNREACHABLE();
1164     }
1165   }
1166 }
1167 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1168 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1169     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1170   // All registers are assumed to be correctly set up.
1171 
1172   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1173   switch (invoke->GetMethodLoadKind()) {
1174     case MethodLoadKind::kStringInit: {
1175       // temp = thread->string_init_entrypoint
1176       uint32_t offset =
1177           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1178       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1179       break;
1180     }
1181     case MethodLoadKind::kRecursive: {
1182       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1183       break;
1184     }
1185     case MethodLoadKind::kRuntimeCall: {
1186       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1187       return;  // No code pointer retrieval; the runtime performs the call directly.
1188     }
1189     case MethodLoadKind::kBootImageLinkTimePcRelative:
1190       // For kCallCriticalNative we skip loading the method and do the call directly.
1191       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1192         break;
1193       }
1194       FALLTHROUGH_INTENDED;
1195     default: {
1196       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1197       break;
1198     }
1199   }
1200 
1201   switch (invoke->GetCodePtrLocation()) {
1202     case CodePtrLocation::kCallSelf:
1203       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1204       __ call(&frame_entry_label_);
1205       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1206       break;
1207     case CodePtrLocation::kCallCriticalNative: {
1208       size_t out_frame_size =
1209           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1210                                     kNativeStackAlignment,
1211                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1212       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1213         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1214         __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1215         RecordBootImageJniEntrypointPatch(invoke);
1216       } else {
1217         // (callee_method + offset_of_jni_entry_point)()
1218         __ call(Address(callee_method.AsRegister<CpuRegister>(),
1219                          ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1220       }
1221       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1222       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1223       switch (invoke->GetType()) {
1224         case DataType::Type::kBool:
1225           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1226           break;
1227         case DataType::Type::kInt8:
1228           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1229           break;
1230         case DataType::Type::kUint16:
1231           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1232           break;
1233         case DataType::Type::kInt16:
1234           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1235           break;
1236         case DataType::Type::kInt32:
1237         case DataType::Type::kInt64:
1238         case DataType::Type::kFloat32:
1239         case DataType::Type::kFloat64:
1240         case DataType::Type::kVoid:
1241           break;
1242         default:
1243           DCHECK(false) << invoke->GetType();
1244           break;
1245       }
1246       if (out_frame_size != 0u) {
1247         DecreaseFrame(out_frame_size);
1248       }
1249       break;
1250     }
1251     case CodePtrLocation::kCallArtMethod:
1252       // (callee_method + offset_of_quick_compiled_code)()
1253       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1254                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1255                           kX86_64PointerSize).SizeValue()));
1256       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1257       break;
1258   }
1259 
1260   DCHECK(!IsLeafMethod());
1261 }
1262 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1263 void CodeGeneratorX86_64::GenerateVirtualCall(
1264     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1265   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1266   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1267       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1268 
1269   // Use the calling convention instead of the location of the receiver, as
1270   // intrinsics may have put the receiver in a different register. In the intrinsics
1271   // slow path, the arguments have been moved to the right place, so here we are
1272   // guaranteed that the receiver is the first register of the calling convention.
1273   InvokeDexCallingConvention calling_convention;
1274   Register receiver = calling_convention.GetRegisterAt(0);
1275 
1276   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1277   // /* HeapReference<Class> */ temp = receiver->klass_
1278   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1279   MaybeRecordImplicitNullCheck(invoke);
1280   // Instead of simply (possibly) unpoisoning `temp` here, we should
1281   // emit a read barrier for the previous class reference load.
1282   // However this is not required in practice, as this is an
1283   // intermediate/temporary reference and because the current
1284   // concurrent copying collector keeps the from-space memory
1285   // intact/accessible until the end of the marking phase (the
1286   // concurrent copying collector may not in the future).
1287   __ MaybeUnpoisonHeapReference(temp);
1288 
1289   MaybeGenerateInlineCacheCheck(invoke, temp);
1290 
1291   // temp = temp->GetMethodAt(method_offset);
1292   __ movq(temp, Address(temp, method_offset));
1293   // call temp->GetEntryPoint();
1294   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1295       kX86_64PointerSize).SizeValue()));
1296   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1297 }
1298 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1299 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1300   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1301   __ Bind(&boot_image_other_patches_.back().label);
1302 }
1303 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1304 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1305   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1306   __ Bind(&boot_image_other_patches_.back().label);
1307 }
1308 
RecordBootImageMethodPatch(HInvoke * invoke)1309 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1310   boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1311                                           invoke->GetResolvedMethodReference().index);
1312   __ Bind(&boot_image_method_patches_.back().label);
1313 }
1314 
RecordMethodBssEntryPatch(HInvoke * invoke)1315 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1316   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1317          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1318          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1319                          invoke->GetMethodReference().dex_file));
1320   method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1321                                          invoke->GetMethodReference().index);
1322   __ Bind(&method_bss_entry_patches_.back().label);
1323 }
1324 
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1325 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1326                                                    dex::TypeIndex type_index) {
1327   boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1328   __ Bind(&boot_image_type_patches_.back().label);
1329 }
1330 
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1331 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1332                                                   dex::TypeIndex type_index) {
1333   app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1334   __ Bind(&app_image_type_patches_.back().label);
1335 }
1336 
NewTypeBssEntryPatch(HLoadClass * load_class)1337 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1338   ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1339   switch (load_class->GetLoadKind()) {
1340     case HLoadClass::LoadKind::kBssEntry:
1341       patches = &type_bss_entry_patches_;
1342       break;
1343     case HLoadClass::LoadKind::kBssEntryPublic:
1344       patches = &public_type_bss_entry_patches_;
1345       break;
1346     case HLoadClass::LoadKind::kBssEntryPackage:
1347       patches = &package_type_bss_entry_patches_;
1348       break;
1349     default:
1350       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1351       UNREACHABLE();
1352   }
1353   patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1354   return &patches->back().label;
1355 }
1356 
RecordBootImageStringPatch(HLoadString * load_string)1357 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1358   boot_image_string_patches_.emplace_back(
1359       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1360   __ Bind(&boot_image_string_patches_.back().label);
1361 }
1362 
NewStringBssEntryPatch(HLoadString * load_string)1363 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1364   string_bss_entry_patches_.emplace_back(
1365       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1366   return &string_bss_entry_patches_.back().label;
1367 }
1368 
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1369 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1370   method_type_bss_entry_patches_.emplace_back(
1371       &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1372   return &method_type_bss_entry_patches_.back().label;
1373 }
1374 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1375 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1376   boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1377                                                   invoke->GetResolvedMethodReference().index);
1378   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1379 }
1380 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1381 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1382   if (GetCompilerOptions().IsBootImage()) {
1383     __ leal(reg,
1384             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1385     RecordBootImageIntrinsicPatch(boot_image_reference);
1386   } else if (GetCompilerOptions().GetCompilePic()) {
1387     __ movl(reg,
1388             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1389     RecordBootImageRelRoPatch(boot_image_reference);
1390   } else {
1391     DCHECK(GetCompilerOptions().IsJitCompiler());
1392     gc::Heap* heap = Runtime::Current()->GetHeap();
1393     DCHECK(!heap->GetBootImageSpaces().empty());
1394     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1395     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1396   }
1397 }
1398 
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1399 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1400   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1401   if (GetCompilerOptions().IsBootImage()) {
1402     // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1403     __ leal(reg,
1404             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1405     MethodReference target_method = invoke->GetResolvedMethodReference();
1406     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1407     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1408     __ Bind(&boot_image_type_patches_.back().label);
1409   } else {
1410     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1411     LoadBootImageAddress(reg, boot_image_offset);
1412   }
1413 }
1414 
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1415 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1416   if (GetCompilerOptions().IsBootImage()) {
1417     ScopedObjectAccess soa(Thread::Current());
1418     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1419     boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1420     __ Bind(&boot_image_type_patches_.back().label);
1421   } else {
1422     uint32_t boot_image_offset = GetBootImageOffset(class_root);
1423     LoadBootImageAddress(reg, boot_image_offset);
1424   }
1425 }
1426 
1427 // The label points to the end of the "movl" or another instruction but the literal offset
1428 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1429 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1430 
1431 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1432 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1433     const ArenaDeque<PatchInfo<Label>>& infos,
1434     ArenaVector<linker::LinkerPatch>* linker_patches) {
1435   for (const PatchInfo<Label>& info : infos) {
1436     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1437     linker_patches->push_back(
1438         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1439   }
1440 }
1441 
1442 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1443 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1444                                      const DexFile* target_dex_file,
1445                                      uint32_t pc_insn_offset,
1446                                      uint32_t boot_image_offset) {
1447   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1448   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1449 }
1450 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1451 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1452   DCHECK(linker_patches->empty());
1453   size_t size =
1454       boot_image_method_patches_.size() +
1455       method_bss_entry_patches_.size() +
1456       boot_image_type_patches_.size() +
1457       app_image_type_patches_.size() +
1458       type_bss_entry_patches_.size() +
1459       public_type_bss_entry_patches_.size() +
1460       package_type_bss_entry_patches_.size() +
1461       boot_image_string_patches_.size() +
1462       string_bss_entry_patches_.size() +
1463       method_type_bss_entry_patches_.size() +
1464       boot_image_jni_entrypoint_patches_.size() +
1465       boot_image_other_patches_.size();
1466   linker_patches->reserve(size);
1467   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1468     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1469         boot_image_method_patches_, linker_patches);
1470     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1471         boot_image_type_patches_, linker_patches);
1472     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1473         boot_image_string_patches_, linker_patches);
1474   } else {
1475     DCHECK(boot_image_method_patches_.empty());
1476     DCHECK(boot_image_type_patches_.empty());
1477     DCHECK(boot_image_string_patches_.empty());
1478   }
1479   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1480   if (GetCompilerOptions().IsBootImage()) {
1481     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1482         boot_image_other_patches_, linker_patches);
1483   } else {
1484     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1485         boot_image_other_patches_, linker_patches);
1486     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1487         app_image_type_patches_, linker_patches);
1488   }
1489   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1490       method_bss_entry_patches_, linker_patches);
1491   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1492       type_bss_entry_patches_, linker_patches);
1493   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1494       public_type_bss_entry_patches_, linker_patches);
1495   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1496       package_type_bss_entry_patches_, linker_patches);
1497   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1498       string_bss_entry_patches_, linker_patches);
1499   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1500       method_type_bss_entry_patches_, linker_patches);
1501   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1502       boot_image_jni_entrypoint_patches_, linker_patches);
1503   DCHECK_EQ(size, linker_patches->size());
1504 }
1505 
DumpCoreRegister(std::ostream & stream,int reg) const1506 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1507   stream << Register(reg);
1508 }
1509 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1510 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1511   stream << FloatRegister(reg);
1512 }
1513 
GetInstructionSetFeatures() const1514 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1515   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1516 }
1517 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1518 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1519   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1520   return kX86_64WordSize;
1521 }
1522 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1523 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1524   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1525   return kX86_64WordSize;
1526 }
1527 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1528 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1529   if (GetGraph()->HasSIMD()) {
1530     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1531   } else {
1532     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1533   }
1534   return GetSlowPathFPWidth();
1535 }
1536 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1537 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1538   if (GetGraph()->HasSIMD()) {
1539     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1540   } else {
1541     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542   }
1543   return GetSlowPathFPWidth();
1544 }
1545 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1546 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1547                                         HInstruction* instruction,
1548                                         uint32_t dex_pc,
1549                                         SlowPathCode* slow_path) {
1550   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1551   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1552   if (EntrypointRequiresStackMap(entrypoint)) {
1553     RecordPcInfo(instruction, dex_pc, slow_path);
1554   }
1555 }
1556 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1557 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1558                                                               HInstruction* instruction,
1559                                                               SlowPathCode* slow_path) {
1560   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1561   GenerateInvokeRuntime(entry_point_offset);
1562 }
1563 
GenerateInvokeRuntime(int32_t entry_point_offset)1564 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1565   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1566 }
1567 
1568 namespace detail {
1569 
1570 // Mark which intrinsics we don't have handcrafted code for.
1571 template <Intrinsics T>
1572 struct IsUnimplemented {
1573   bool is_unimplemented = false;
1574 };
1575 
1576 #define TRUE_OVERRIDE(Name)                     \
1577   template <>                                   \
1578   struct IsUnimplemented<Intrinsics::k##Name> { \
1579     bool is_unimplemented = true;               \
1580   };
1581 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1582 #undef TRUE_OVERRIDE
1583 
1584 static constexpr bool kIsIntrinsicUnimplemented[] = {
1585     false,  // kNone
1586 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1587     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1588     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1589 #undef IS_UNIMPLEMENTED
1590 };
1591 
1592 }  // namespace detail
1593 
1594 static constexpr int kNumberOfCpuRegisterPairs = 0;
1595 // Use a fake return address register to mimic Quick.
1596 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1597 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1598                                          const CompilerOptions& compiler_options,
1599                                          OptimizingCompilerStats* stats)
1600     : CodeGenerator(graph,
1601                     kNumberOfCpuRegisters,
1602                     kNumberOfFloatRegisters,
1603                     kNumberOfCpuRegisterPairs,
1604                     ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1605                         | (1 << kFakeReturnRegister),
1606                     ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1607                     compiler_options,
1608                     stats,
1609                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1610       block_labels_(nullptr),
1611       location_builder_(graph, this),
1612       instruction_visitor_(graph, this),
1613       move_resolver_(graph->GetAllocator(), this),
1614       assembler_(graph->GetAllocator(),
1615                  compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1616       constant_area_start_(0),
1617       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1618       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1619       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1620       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1621       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1622       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1623       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1624       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1625       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1626       method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1627       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1628       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1629       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1630       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1631       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1632   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1633 }
1634 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1635 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1636                                                                CodeGeneratorX86_64* codegen)
1637       : InstructionCodeGenerator(graph, codegen),
1638         assembler_(codegen->GetAssembler()),
1639         codegen_(codegen) {}
1640 
SetupBlockedRegisters() const1641 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1642   // Stack register is always reserved.
1643   blocked_core_registers_[RSP] = true;
1644 
1645   // Block the register used as TMP.
1646   blocked_core_registers_[TMP] = true;
1647 }
1648 
DWARFReg(Register reg)1649 static dwarf::Reg DWARFReg(Register reg) {
1650   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1651 }
1652 
DWARFReg(FloatRegister reg)1653 static dwarf::Reg DWARFReg(FloatRegister reg) {
1654   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1655 }
1656 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1657 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1658   LocationSummary* locations = new (GetGraph()->GetAllocator())
1659       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1660   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1661   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1662   locations->AddTemp(Location::RegisterLocation(RAX));
1663   locations->AddTemp(Location::RegisterLocation(RDX));
1664 }
1665 
GenerateMethodEntryExitHook(HInstruction * instruction)1666 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1667   SlowPathCode* slow_path =
1668       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1669   LocationSummary* locations = instruction->GetLocations();
1670   codegen_->AddSlowPath(slow_path);
1671 
1672   if (instruction->IsMethodExitHook()) {
1673     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1674     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1675     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1676     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1677     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1678     __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1679             Immediate(0));
1680     __ j(kNotEqual, slow_path->GetEntryLabel());
1681   }
1682 
1683   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1684   MemberOffset  offset = instruction->IsMethodExitHook() ?
1685       instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1686       : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1687   __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1688   __ cmpb(Address(CpuRegister(TMP), 0),
1689           Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1690   // Check if there are any method entry / exit listeners. If no, continue with execution.
1691   __ j(kLess, slow_path->GetExitLabel());
1692   // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1693   __ j(kGreater, slow_path->GetEntryLabel());
1694 
1695   // Check if there is place in the buffer for a new entry, if no, take slow path.
1696   CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>();
1697   CpuRegister entry_addr = CpuRegister(TMP);
1698   uint64_t trace_buffer_index_offset =
1699       Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue();
1700   __ gs()->movq(CpuRegister(index),
1701                 Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true));
1702   __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock));
1703   __ j(kLess, slow_path->GetEntryLabel());
1704 
1705   // Update the index in the `Thread`.
1706   __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true),
1707                 CpuRegister(index));
1708   // Calculate the entry address in the buffer.
1709   // entry_addr = base_addr + sizeof(void*) * index
1710   __ gs()->movq(entry_addr,
1711                 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1712                                   /* no_rip= */ true));
1713   __ leaq(CpuRegister(entry_addr),
1714           Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0));
1715 
1716   // Record method pointer and action.
1717   CpuRegister method = index;
1718   __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1719   // Use last two bits to encode trace method action. For MethodEntry it is 0
1720   // so no need to set the bits since they are 0 already.
1721   if (instruction->IsMethodExitHook()) {
1722     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1723     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1724     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1725     __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1726   }
1727   __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method));
1728   // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1729   __ rdtsc();
1730   __ shlq(CpuRegister(RDX), Immediate(32));
1731   __ orq(CpuRegister(RAX), CpuRegister(RDX));
1732   __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX));
1733   __ Bind(slow_path->GetExitLabel());
1734 }
1735 
VisitMethodEntryHook(HMethodEntryHook * instruction)1736 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1737   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1738   DCHECK(codegen_->RequiresCurrentMethod());
1739   GenerateMethodEntryExitHook(instruction);
1740 }
1741 
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1742 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1743   switch (instr->InputAt(0)->GetType()) {
1744     case DataType::Type::kReference:
1745     case DataType::Type::kBool:
1746     case DataType::Type::kUint8:
1747     case DataType::Type::kInt8:
1748     case DataType::Type::kUint16:
1749     case DataType::Type::kInt16:
1750     case DataType::Type::kInt32:
1751     case DataType::Type::kInt64:
1752       locations->SetInAt(0, Location::RegisterLocation(RAX));
1753       break;
1754 
1755     case DataType::Type::kFloat32:
1756     case DataType::Type::kFloat64:
1757       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1758       break;
1759 
1760     case DataType::Type::kVoid:
1761       locations->SetInAt(0, Location::NoLocation());
1762       break;
1763 
1764     default:
1765       LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1766   }
1767 }
1768 
VisitMethodExitHook(HMethodExitHook * method_hook)1769 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1770   LocationSummary* locations = new (GetGraph()->GetAllocator())
1771       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1772   SetInForReturnValue(method_hook, locations);
1773   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1774   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1775   locations->AddTemp(Location::RegisterLocation(RAX));
1776   locations->AddTemp(Location::RegisterLocation(RDX));
1777 }
1778 
VisitMethodExitHook(HMethodExitHook * instruction)1779 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1780   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1781   DCHECK(codegen_->RequiresCurrentMethod());
1782   GenerateMethodEntryExitHook(instruction);
1783 }
1784 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1785 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1786   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1787     NearLabel overflow;
1788     Register method = kMethodRegisterArgument;
1789     if (!is_frame_entry) {
1790       CHECK(RequiresCurrentMethod());
1791       method = TMP;
1792       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1793     }
1794     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1795             Immediate(interpreter::kNterpHotnessValue));
1796     __ j(kEqual, &overflow);
1797     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1798             Immediate(-1));
1799     __ Bind(&overflow);
1800   }
1801 
1802   if (GetGraph()->IsCompilingBaseline() &&
1803       GetGraph()->IsUsefulOptimizing() &&
1804       !Runtime::Current()->IsAotCompiler()) {
1805     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1806     DCHECK(info != nullptr);
1807     CHECK(!HasEmptyFrame());
1808     uint64_t address = reinterpret_cast64<uint64_t>(info) +
1809         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1810     SlowPathCode* slow_path =
1811         new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1812     AddSlowPath(slow_path);
1813     // Note: if the address was in the 32bit range, we could use
1814     // Address::Absolute and avoid this movq.
1815     __ movq(CpuRegister(TMP), Immediate(address));
1816     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1817     // it reaching 0. Also, at this point we have no register available to look
1818     // at the counter directly.
1819     __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1820     __ j(kEqual, slow_path->GetEntryLabel());
1821     __ Bind(slow_path->GetExitLabel());
1822   }
1823 }
1824 
GenerateFrameEntry()1825 void CodeGeneratorX86_64::GenerateFrameEntry() {
1826   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1827 
1828   // Check if we need to generate the clinit check. We will jump to the
1829   // resolution stub if the class is not initialized and the executing thread is
1830   // not the thread initializing it.
1831   // We do this before constructing the frame to get the correct stack trace if
1832   // an exception is thrown.
1833   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1834     NearLabel resolution;
1835     // Check if we're visibly initialized.
1836 
1837     // We don't emit a read barrier here to save on code size. We rely on the
1838     // resolution trampoline to do a suspend check before re-entering this code.
1839     __ movl(CpuRegister(TMP),
1840             Address(CpuRegister(kMethodRegisterArgument),
1841                     ArtMethod::DeclaringClassOffset().Int32Value()));
1842     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1843             Immediate(kShiftedVisiblyInitializedValue));
1844     __ j(kAboveEqual, &frame_entry_label_);
1845 
1846     // Check if we're initializing and the thread initializing is the one
1847     // executing the code.
1848     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1849             Immediate(kShiftedInitializingValue));
1850     __ j(kBelow, &resolution);
1851 
1852     __ movl(CpuRegister(TMP),
1853             Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1854     __ gs()->cmpl(
1855         CpuRegister(TMP),
1856         Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1857     __ j(kEqual, &frame_entry_label_);
1858     __ Bind(&resolution);
1859 
1860     // Jump to the resolution stub.
1861     ThreadOffset64 entrypoint_offset =
1862         GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1863     __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1864   }
1865 
1866   __ Bind(&frame_entry_label_);
1867   bool skip_overflow_check = IsLeafMethod()
1868       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1869   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1870 
1871 
1872   if (!skip_overflow_check) {
1873     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1874     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1875     RecordPcInfo(nullptr, 0);
1876   }
1877 
1878   if (!HasEmptyFrame()) {
1879     // Make sure the frame size isn't unreasonably large.
1880     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1881 
1882     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1883       Register reg = kCoreCalleeSaves[i];
1884       if (allocated_registers_.ContainsCoreRegister(reg)) {
1885         __ pushq(CpuRegister(reg));
1886         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1887         __ cfi().RelOffset(DWARFReg(reg), 0);
1888       }
1889     }
1890 
1891     int adjust = GetFrameSize() - GetCoreSpillSize();
1892     IncreaseFrame(adjust);
1893     uint32_t xmm_spill_location = GetFpuSpillStart();
1894     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1895 
1896     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1897       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1898         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1899         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1900         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1901       }
1902     }
1903 
1904     // Save the current method if we need it. Note that we do not
1905     // do this in HCurrentMethod, as the instruction might have been removed
1906     // in the SSA graph.
1907     if (RequiresCurrentMethod()) {
1908       CHECK(!HasEmptyFrame());
1909       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1910               CpuRegister(kMethodRegisterArgument));
1911     }
1912 
1913     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1914       CHECK(!HasEmptyFrame());
1915       // Initialize should_deoptimize flag to 0.
1916       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1917     }
1918   }
1919 
1920   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1921 }
1922 
GenerateFrameExit()1923 void CodeGeneratorX86_64::GenerateFrameExit() {
1924   __ cfi().RememberState();
1925   if (!HasEmptyFrame()) {
1926     uint32_t xmm_spill_location = GetFpuSpillStart();
1927     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1928     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1929       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1930         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1931         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1932         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1933       }
1934     }
1935 
1936     int adjust = GetFrameSize() - GetCoreSpillSize();
1937     DecreaseFrame(adjust);
1938 
1939     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1940       Register reg = kCoreCalleeSaves[i];
1941       if (allocated_registers_.ContainsCoreRegister(reg)) {
1942         __ popq(CpuRegister(reg));
1943         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1944         __ cfi().Restore(DWARFReg(reg));
1945       }
1946     }
1947   }
1948   __ ret();
1949   __ cfi().RestoreState();
1950   __ cfi().DefCFAOffset(GetFrameSize());
1951 }
1952 
Bind(HBasicBlock * block)1953 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1954   __ Bind(GetLabelOf(block));
1955 }
1956 
Move(Location destination,Location source)1957 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1958   if (source.Equals(destination)) {
1959     return;
1960   }
1961   if (destination.IsRegister()) {
1962     CpuRegister dest = destination.AsRegister<CpuRegister>();
1963     if (source.IsRegister()) {
1964       __ movq(dest, source.AsRegister<CpuRegister>());
1965     } else if (source.IsFpuRegister()) {
1966       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1967     } else if (source.IsStackSlot()) {
1968       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1969     } else if (source.IsConstant()) {
1970       HConstant* constant = source.GetConstant();
1971       if (constant->IsLongConstant()) {
1972         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1973       } else if (constant->IsDoubleConstant()) {
1974         Load64BitValue(dest, GetInt64ValueOf(constant));
1975       } else {
1976         Load32BitValue(dest, GetInt32ValueOf(constant));
1977       }
1978     } else {
1979       DCHECK(source.IsDoubleStackSlot());
1980       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1981     }
1982   } else if (destination.IsFpuRegister()) {
1983     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1984     if (source.IsRegister()) {
1985       __ movd(dest, source.AsRegister<CpuRegister>());
1986     } else if (source.IsFpuRegister()) {
1987       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1988     } else if (source.IsConstant()) {
1989       HConstant* constant = source.GetConstant();
1990       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1991       if (constant->IsFloatConstant()) {
1992         Load32BitValue(dest, static_cast<int32_t>(value));
1993       } else {
1994         Load64BitValue(dest, value);
1995       }
1996     } else if (source.IsStackSlot()) {
1997       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1998     } else {
1999       DCHECK(source.IsDoubleStackSlot());
2000       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2001     }
2002   } else if (destination.IsStackSlot()) {
2003     if (source.IsRegister()) {
2004       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2005               source.AsRegister<CpuRegister>());
2006     } else if (source.IsFpuRegister()) {
2007       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2008                source.AsFpuRegister<XmmRegister>());
2009     } else if (source.IsConstant()) {
2010       HConstant* constant = source.GetConstant();
2011       int32_t value = GetInt32ValueOf(constant);
2012       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2013     } else {
2014       DCHECK(source.IsStackSlot()) << source;
2015       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2016       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2017     }
2018   } else {
2019     DCHECK(destination.IsDoubleStackSlot());
2020     if (source.IsRegister()) {
2021       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2022               source.AsRegister<CpuRegister>());
2023     } else if (source.IsFpuRegister()) {
2024       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2025                source.AsFpuRegister<XmmRegister>());
2026     } else if (source.IsConstant()) {
2027       HConstant* constant = source.GetConstant();
2028       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2029       int64_t value = GetInt64ValueOf(constant);
2030       Store64BitValueToStack(destination, value);
2031     } else {
2032       DCHECK(source.IsDoubleStackSlot());
2033       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2034       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2035     }
2036   }
2037 }
2038 
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2039 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2040                                                     Location dst,
2041                                                     Address src) {
2042   switch (type) {
2043     case DataType::Type::kBool:
2044     case DataType::Type::kUint8:
2045       __ movzxb(dst.AsRegister<CpuRegister>(), src);
2046       break;
2047     case DataType::Type::kInt8:
2048       __ movsxb(dst.AsRegister<CpuRegister>(), src);
2049       break;
2050     case DataType::Type::kUint16:
2051       __ movzxw(dst.AsRegister<CpuRegister>(), src);
2052       break;
2053     case DataType::Type::kInt16:
2054       __ movsxw(dst.AsRegister<CpuRegister>(), src);
2055       break;
2056     case DataType::Type::kInt32:
2057     case DataType::Type::kUint32:
2058       __ movl(dst.AsRegister<CpuRegister>(), src);
2059       break;
2060     case DataType::Type::kInt64:
2061     case DataType::Type::kUint64:
2062       __ movq(dst.AsRegister<CpuRegister>(), src);
2063       break;
2064     case DataType::Type::kFloat32:
2065       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2066       break;
2067     case DataType::Type::kFloat64:
2068       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2069       break;
2070     case DataType::Type::kVoid:
2071     case DataType::Type::kReference:
2072       LOG(FATAL) << "Unreachable type " << type;
2073       UNREACHABLE();
2074   }
2075 }
2076 
MoveConstant(Location location,int32_t value)2077 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2078   DCHECK(location.IsRegister());
2079   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2080 }
2081 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2082 void CodeGeneratorX86_64::MoveLocation(Location dst,
2083                                        Location src,
2084                                        [[maybe_unused]] DataType::Type dst_type) {
2085   Move(dst, src);
2086 }
2087 
AddLocationAsTemp(Location location,LocationSummary * locations)2088 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2089   if (location.IsRegister()) {
2090     locations->AddTemp(location);
2091   } else {
2092     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2093   }
2094 }
2095 
HandleGoto(HInstruction * got,HBasicBlock * successor)2096 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2097   if (successor->IsExitBlock()) {
2098     DCHECK(got->GetPrevious()->AlwaysThrows());
2099     return;  // no code needed
2100   }
2101 
2102   HBasicBlock* block = got->GetBlock();
2103   HInstruction* previous = got->GetPrevious();
2104 
2105   HLoopInformation* info = block->GetLoopInformation();
2106   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2107     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2108     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2109     return;
2110   }
2111 
2112   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2113     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2114   }
2115   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2116     __ jmp(codegen_->GetLabelOf(successor));
2117   }
2118 }
2119 
VisitGoto(HGoto * got)2120 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2121   got->SetLocations(nullptr);
2122 }
2123 
VisitGoto(HGoto * got)2124 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2125   HandleGoto(got, got->GetSuccessor());
2126 }
2127 
VisitTryBoundary(HTryBoundary * try_boundary)2128 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2129   try_boundary->SetLocations(nullptr);
2130 }
2131 
VisitTryBoundary(HTryBoundary * try_boundary)2132 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2133   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2134   if (!successor->IsExitBlock()) {
2135     HandleGoto(try_boundary, successor);
2136   }
2137 }
2138 
VisitExit(HExit * exit)2139 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2140   exit->SetLocations(nullptr);
2141 }
2142 
VisitExit(HExit * exit)2143 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2144 
2145 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2146 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2147                                                      LabelType* true_label,
2148                                                      LabelType* false_label) {
2149   if (cond->IsFPConditionTrueIfNaN()) {
2150     __ j(kUnordered, true_label);
2151   } else if (cond->IsFPConditionFalseIfNaN()) {
2152     __ j(kUnordered, false_label);
2153   }
2154   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2155 }
2156 
GenerateCompareTest(HCondition * condition)2157 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2158   LocationSummary* locations = condition->GetLocations();
2159 
2160   Location left = locations->InAt(0);
2161   Location right = locations->InAt(1);
2162   DataType::Type type = condition->InputAt(0)->GetType();
2163   switch (type) {
2164     case DataType::Type::kBool:
2165     case DataType::Type::kUint8:
2166     case DataType::Type::kInt8:
2167     case DataType::Type::kUint16:
2168     case DataType::Type::kInt16:
2169     case DataType::Type::kInt32:
2170     case DataType::Type::kReference: {
2171       codegen_->GenerateIntCompare(left, right);
2172       break;
2173     }
2174     case DataType::Type::kInt64: {
2175       codegen_->GenerateLongCompare(left, right);
2176       break;
2177     }
2178     case DataType::Type::kFloat32: {
2179       if (right.IsFpuRegister()) {
2180         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2181       } else if (right.IsConstant()) {
2182         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2183                    codegen_->LiteralFloatAddress(
2184                        right.GetConstant()->AsFloatConstant()->GetValue()));
2185       } else {
2186         DCHECK(right.IsStackSlot());
2187         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2188                    Address(CpuRegister(RSP), right.GetStackIndex()));
2189       }
2190       break;
2191     }
2192     case DataType::Type::kFloat64: {
2193       if (right.IsFpuRegister()) {
2194         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2195       } else if (right.IsConstant()) {
2196         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2197                    codegen_->LiteralDoubleAddress(
2198                        right.GetConstant()->AsDoubleConstant()->GetValue()));
2199       } else {
2200         DCHECK(right.IsDoubleStackSlot());
2201         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2202                    Address(CpuRegister(RSP), right.GetStackIndex()));
2203       }
2204       break;
2205     }
2206     default:
2207       LOG(FATAL) << "Unexpected condition type " << type;
2208   }
2209 }
2210 
2211 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2212 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2213                                                                   LabelType* true_target_in,
2214                                                                   LabelType* false_target_in) {
2215   // Generated branching requires both targets to be explicit. If either of the
2216   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2217   LabelType fallthrough_target;
2218   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2219   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2220 
2221   // Generate the comparison to set the CC.
2222   GenerateCompareTest(condition);
2223 
2224   // Now generate the correct jump(s).
2225   DataType::Type type = condition->InputAt(0)->GetType();
2226   switch (type) {
2227     case DataType::Type::kInt64: {
2228       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2229       break;
2230     }
2231     case DataType::Type::kFloat32: {
2232       GenerateFPJumps(condition, true_target, false_target);
2233       break;
2234     }
2235     case DataType::Type::kFloat64: {
2236       GenerateFPJumps(condition, true_target, false_target);
2237       break;
2238     }
2239     default:
2240       LOG(FATAL) << "Unexpected condition type " << type;
2241   }
2242 
2243   if (false_target != &fallthrough_target) {
2244     __ jmp(false_target);
2245   }
2246 
2247   if (fallthrough_target.IsLinked()) {
2248     __ Bind(&fallthrough_target);
2249   }
2250 }
2251 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2252 static bool AreEflagsSetFrom(HInstruction* cond,
2253                              HInstruction* branch,
2254                              const CompilerOptions& compiler_options) {
2255   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2256   // are set only strictly before `branch`. We can't use the eflags on long
2257   // conditions if they are materialized due to the complex branching.
2258   return cond->IsCondition() &&
2259          cond->GetNext() == branch &&
2260          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2261          !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2262            compiler_options.ProfileBranches());
2263 }
2264 
2265 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2266 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2267                                                            size_t condition_input_index,
2268                                                            LabelType* true_target,
2269                                                            LabelType* false_target) {
2270   HInstruction* cond = instruction->InputAt(condition_input_index);
2271 
2272   if (true_target == nullptr && false_target == nullptr) {
2273     // Nothing to do. The code always falls through.
2274     return;
2275   } else if (cond->IsIntConstant()) {
2276     // Constant condition, statically compared against "true" (integer value 1).
2277     if (cond->AsIntConstant()->IsTrue()) {
2278       if (true_target != nullptr) {
2279         __ jmp(true_target);
2280       }
2281     } else {
2282       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2283       if (false_target != nullptr) {
2284         __ jmp(false_target);
2285       }
2286     }
2287     return;
2288   }
2289 
2290   // The following code generates these patterns:
2291   //  (1) true_target == nullptr && false_target != nullptr
2292   //        - opposite condition true => branch to false_target
2293   //  (2) true_target != nullptr && false_target == nullptr
2294   //        - condition true => branch to true_target
2295   //  (3) true_target != nullptr && false_target != nullptr
2296   //        - condition true => branch to true_target
2297   //        - branch to false_target
2298   if (IsBooleanValueOrMaterializedCondition(cond)) {
2299     if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2300       if (true_target == nullptr) {
2301         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2302       } else {
2303         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2304       }
2305     } else {
2306       // Materialized condition, compare against 0.
2307       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2308       if (lhs.IsRegister()) {
2309         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2310       } else {
2311         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2312       }
2313       if (true_target == nullptr) {
2314         __ j(kEqual, false_target);
2315       } else {
2316         __ j(kNotEqual, true_target);
2317       }
2318     }
2319   } else {
2320     // Condition has not been materialized, use its inputs as the
2321     // comparison and its condition as the branch condition.
2322     HCondition* condition = cond->AsCondition();
2323 
2324     // If this is a long or FP comparison that has been folded into
2325     // the HCondition, generate the comparison directly.
2326     DataType::Type type = condition->InputAt(0)->GetType();
2327     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2328       GenerateCompareTestAndBranch(condition, true_target, false_target);
2329       return;
2330     }
2331 
2332     Location lhs = condition->GetLocations()->InAt(0);
2333     Location rhs = condition->GetLocations()->InAt(1);
2334     codegen_->GenerateIntCompare(lhs, rhs);
2335       if (true_target == nullptr) {
2336       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2337     } else {
2338       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2339     }
2340   }
2341 
2342   // If neither branch falls through (case 3), the conditional branch to `true_target`
2343   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2344   if (true_target != nullptr && false_target != nullptr) {
2345     __ jmp(false_target);
2346   }
2347 }
2348 
VisitIf(HIf * if_instr)2349 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2350   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2351   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2352     if (GetGraph()->IsCompilingBaseline() &&
2353         codegen_->GetCompilerOptions().ProfileBranches() &&
2354         !Runtime::Current()->IsAotCompiler()) {
2355       locations->SetInAt(0, Location::RequiresRegister());
2356       locations->AddTemp(Location::RequiresRegister());
2357     } else {
2358       locations->SetInAt(0, Location::Any());
2359     }
2360   }
2361 }
2362 
VisitIf(HIf * if_instr)2363 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2364   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2365   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2366   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2367       nullptr : codegen_->GetLabelOf(true_successor);
2368   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2369       nullptr : codegen_->GetLabelOf(false_successor);
2370   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2371     if (GetGraph()->IsCompilingBaseline() &&
2372         codegen_->GetCompilerOptions().ProfileBranches() &&
2373         !Runtime::Current()->IsAotCompiler()) {
2374       DCHECK(if_instr->InputAt(0)->IsCondition());
2375       CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2376       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2377       DCHECK(info != nullptr);
2378       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2379       // Currently, not all If branches are profiled.
2380       if (cache != nullptr) {
2381         uint64_t address =
2382             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2383         static_assert(
2384             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2385             "Unexpected offsets for BranchCache");
2386         NearLabel done;
2387         Location lhs = if_instr->GetLocations()->InAt(0);
2388         __ movq(CpuRegister(TMP), Immediate(address));
2389         __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2390         __ addw(temp, Immediate(1));
2391         __ j(kZero, &done);
2392         __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2393         __ Bind(&done);
2394       }
2395     }
2396   }
2397   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2398 }
2399 
VisitDeoptimize(HDeoptimize * deoptimize)2400 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2401   LocationSummary* locations = new (GetGraph()->GetAllocator())
2402       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2403   InvokeRuntimeCallingConvention calling_convention;
2404   RegisterSet caller_saves = RegisterSet::Empty();
2405   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2406   locations->SetCustomSlowPathCallerSaves(caller_saves);
2407   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2408     locations->SetInAt(0, Location::Any());
2409   }
2410 }
2411 
VisitDeoptimize(HDeoptimize * deoptimize)2412 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2413   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2414   GenerateTestAndBranch<Label>(deoptimize,
2415                                /* condition_input_index= */ 0,
2416                                slow_path->GetEntryLabel(),
2417                                /* false_target= */ nullptr);
2418 }
2419 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2420 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2421   LocationSummary* locations = new (GetGraph()->GetAllocator())
2422       LocationSummary(flag, LocationSummary::kNoCall);
2423   locations->SetOut(Location::RequiresRegister());
2424 }
2425 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2426 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2427   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2428           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2429 }
2430 
SelectCanUseCMOV(HSelect * select)2431 static bool SelectCanUseCMOV(HSelect* select) {
2432   // There are no conditional move instructions for XMMs.
2433   if (DataType::IsFloatingPointType(select->GetType())) {
2434     return false;
2435   }
2436 
2437   // A FP condition doesn't generate the single CC that we need.
2438   HInstruction* condition = select->GetCondition();
2439   if (condition->IsCondition() &&
2440       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2441     return false;
2442   }
2443 
2444   // We can generate a CMOV for this Select.
2445   return true;
2446 }
2447 
VisitSelect(HSelect * select)2448 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2449   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2450   if (DataType::IsFloatingPointType(select->GetType())) {
2451     locations->SetInAt(0, Location::RequiresFpuRegister());
2452     locations->SetInAt(1, Location::Any());
2453   } else {
2454     locations->SetInAt(0, Location::RequiresRegister());
2455     if (SelectCanUseCMOV(select)) {
2456       if (select->InputAt(1)->IsConstant()) {
2457         locations->SetInAt(1, Location::RequiresRegister());
2458       } else {
2459         locations->SetInAt(1, Location::Any());
2460       }
2461     } else {
2462       locations->SetInAt(1, Location::Any());
2463     }
2464   }
2465   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2466     locations->SetInAt(2, Location::RequiresRegister());
2467   }
2468   locations->SetOut(Location::SameAsFirstInput());
2469 }
2470 
VisitSelect(HSelect * select)2471 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2472   LocationSummary* locations = select->GetLocations();
2473   if (SelectCanUseCMOV(select)) {
2474     // If both the condition and the source types are integer, we can generate
2475     // a CMOV to implement Select.
2476     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2477     Location value_true_loc = locations->InAt(1);
2478     DCHECK(locations->InAt(0).Equals(locations->Out()));
2479 
2480     HInstruction* select_condition = select->GetCondition();
2481     Condition cond = kNotEqual;
2482 
2483     // Figure out how to test the 'condition'.
2484     if (select_condition->IsCondition()) {
2485       HCondition* condition = select_condition->AsCondition();
2486       if (!condition->IsEmittedAtUseSite()) {
2487         // This was a previously materialized condition.
2488         // Can we use the existing condition code?
2489         if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2490           // Materialization was the previous instruction.  Condition codes are right.
2491           cond = X86_64IntegerCondition(condition->GetCondition());
2492         } else {
2493           // No, we have to recreate the condition code.
2494           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2495           __ testl(cond_reg, cond_reg);
2496         }
2497       } else {
2498         GenerateCompareTest(condition);
2499         cond = X86_64IntegerCondition(condition->GetCondition());
2500       }
2501     } else {
2502       // Must be a Boolean condition, which needs to be compared to 0.
2503       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2504       __ testl(cond_reg, cond_reg);
2505     }
2506 
2507     // If the condition is true, overwrite the output, which already contains false.
2508     // Generate the correct sized CMOV.
2509     bool is_64_bit = DataType::Is64BitType(select->GetType());
2510     if (value_true_loc.IsRegister()) {
2511       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2512     } else {
2513       __ cmov(cond,
2514               value_false,
2515               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2516     }
2517   } else {
2518     NearLabel false_target;
2519     GenerateTestAndBranch<NearLabel>(select,
2520                                      /* condition_input_index= */ 2,
2521                                      /* true_target= */ nullptr,
2522                                      &false_target);
2523     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2524     __ Bind(&false_target);
2525   }
2526 }
2527 
VisitNop(HNop * nop)2528 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2529   new (GetGraph()->GetAllocator()) LocationSummary(nop);
2530 }
2531 
VisitNop(HNop *)2532 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2533   // The environment recording already happened in CodeGenerator::Compile.
2534 }
2535 
IncreaseFrame(size_t adjustment)2536 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2537   __ subq(CpuRegister(RSP), Immediate(adjustment));
2538   __ cfi().AdjustCFAOffset(adjustment);
2539 }
2540 
DecreaseFrame(size_t adjustment)2541 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2542   __ addq(CpuRegister(RSP), Immediate(adjustment));
2543   __ cfi().AdjustCFAOffset(-adjustment);
2544 }
2545 
GenerateNop()2546 void CodeGeneratorX86_64::GenerateNop() {
2547   __ nop();
2548 }
2549 
HandleCondition(HCondition * cond)2550 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2551   LocationSummary* locations =
2552       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2553   // Handle the long/FP comparisons made in instruction simplification.
2554   switch (cond->InputAt(0)->GetType()) {
2555     case DataType::Type::kInt64:
2556       locations->SetInAt(0, Location::RequiresRegister());
2557       locations->SetInAt(1, Location::Any());
2558       break;
2559     case DataType::Type::kFloat32:
2560     case DataType::Type::kFloat64:
2561       locations->SetInAt(0, Location::RequiresFpuRegister());
2562       locations->SetInAt(1, Location::Any());
2563       break;
2564     default:
2565       locations->SetInAt(0, Location::RequiresRegister());
2566       locations->SetInAt(1, Location::Any());
2567       break;
2568   }
2569   if (!cond->IsEmittedAtUseSite()) {
2570     locations->SetOut(Location::RequiresRegister());
2571   }
2572 }
2573 
HandleCondition(HCondition * cond)2574 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2575   if (cond->IsEmittedAtUseSite()) {
2576     return;
2577   }
2578 
2579   LocationSummary* locations = cond->GetLocations();
2580   Location lhs = locations->InAt(0);
2581   Location rhs = locations->InAt(1);
2582   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2583   NearLabel true_label, false_label;
2584 
2585   switch (cond->InputAt(0)->GetType()) {
2586     default:
2587       // Integer case.
2588 
2589       // Clear output register: setcc only sets the low byte.
2590       __ xorl(reg, reg);
2591 
2592       codegen_->GenerateIntCompare(lhs, rhs);
2593       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2594       return;
2595     case DataType::Type::kInt64:
2596       // Clear output register: setcc only sets the low byte.
2597       __ xorl(reg, reg);
2598 
2599       codegen_->GenerateLongCompare(lhs, rhs);
2600       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2601       return;
2602     case DataType::Type::kFloat32: {
2603       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2604       if (rhs.IsConstant()) {
2605         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2606         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2607       } else if (rhs.IsStackSlot()) {
2608         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2609       } else {
2610         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2611       }
2612       GenerateFPJumps(cond, &true_label, &false_label);
2613       break;
2614     }
2615     case DataType::Type::kFloat64: {
2616       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2617       if (rhs.IsConstant()) {
2618         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2619         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2620       } else if (rhs.IsDoubleStackSlot()) {
2621         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2622       } else {
2623         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2624       }
2625       GenerateFPJumps(cond, &true_label, &false_label);
2626       break;
2627     }
2628   }
2629 
2630   // Convert the jumps into the result.
2631   NearLabel done_label;
2632 
2633   // False case: result = 0.
2634   __ Bind(&false_label);
2635   __ xorl(reg, reg);
2636   __ jmp(&done_label);
2637 
2638   // True case: result = 1.
2639   __ Bind(&true_label);
2640   __ movl(reg, Immediate(1));
2641   __ Bind(&done_label);
2642 }
2643 
VisitEqual(HEqual * comp)2644 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2645   HandleCondition(comp);
2646 }
2647 
VisitEqual(HEqual * comp)2648 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2649   HandleCondition(comp);
2650 }
2651 
VisitNotEqual(HNotEqual * comp)2652 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2653   HandleCondition(comp);
2654 }
2655 
VisitNotEqual(HNotEqual * comp)2656 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2657   HandleCondition(comp);
2658 }
2659 
VisitLessThan(HLessThan * comp)2660 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2661   HandleCondition(comp);
2662 }
2663 
VisitLessThan(HLessThan * comp)2664 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2665   HandleCondition(comp);
2666 }
2667 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2668 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2669   HandleCondition(comp);
2670 }
2671 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2672 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2673   HandleCondition(comp);
2674 }
2675 
VisitGreaterThan(HGreaterThan * comp)2676 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2677   HandleCondition(comp);
2678 }
2679 
VisitGreaterThan(HGreaterThan * comp)2680 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2681   HandleCondition(comp);
2682 }
2683 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2684 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2685   HandleCondition(comp);
2686 }
2687 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2688 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2689   HandleCondition(comp);
2690 }
2691 
VisitBelow(HBelow * comp)2692 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2693   HandleCondition(comp);
2694 }
2695 
VisitBelow(HBelow * comp)2696 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2697   HandleCondition(comp);
2698 }
2699 
VisitBelowOrEqual(HBelowOrEqual * comp)2700 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2701   HandleCondition(comp);
2702 }
2703 
VisitBelowOrEqual(HBelowOrEqual * comp)2704 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2705   HandleCondition(comp);
2706 }
2707 
VisitAbove(HAbove * comp)2708 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2709   HandleCondition(comp);
2710 }
2711 
VisitAbove(HAbove * comp)2712 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2713   HandleCondition(comp);
2714 }
2715 
VisitAboveOrEqual(HAboveOrEqual * comp)2716 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2717   HandleCondition(comp);
2718 }
2719 
VisitAboveOrEqual(HAboveOrEqual * comp)2720 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2721   HandleCondition(comp);
2722 }
2723 
VisitCompare(HCompare * compare)2724 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2725   LocationSummary* locations =
2726       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2727   switch (compare->InputAt(0)->GetType()) {
2728     case DataType::Type::kBool:
2729     case DataType::Type::kUint8:
2730     case DataType::Type::kInt8:
2731     case DataType::Type::kUint16:
2732     case DataType::Type::kInt16:
2733     case DataType::Type::kInt32:
2734     case DataType::Type::kInt64: {
2735       locations->SetInAt(0, Location::RequiresRegister());
2736       locations->SetInAt(1, Location::Any());
2737       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2738       break;
2739     }
2740     case DataType::Type::kFloat32:
2741     case DataType::Type::kFloat64: {
2742       locations->SetInAt(0, Location::RequiresFpuRegister());
2743       locations->SetInAt(1, Location::Any());
2744       locations->SetOut(Location::RequiresRegister());
2745       break;
2746     }
2747     default:
2748       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2749   }
2750 }
2751 
VisitCompare(HCompare * compare)2752 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2753   LocationSummary* locations = compare->GetLocations();
2754   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2755   Location left = locations->InAt(0);
2756   Location right = locations->InAt(1);
2757 
2758   NearLabel less, greater, done;
2759   DataType::Type type = compare->InputAt(0)->GetType();
2760   Condition less_cond = kLess;
2761 
2762   switch (type) {
2763     case DataType::Type::kBool:
2764     case DataType::Type::kUint8:
2765     case DataType::Type::kInt8:
2766     case DataType::Type::kUint16:
2767     case DataType::Type::kInt16:
2768     case DataType::Type::kInt32: {
2769       codegen_->GenerateIntCompare(left, right);
2770       break;
2771     }
2772     case DataType::Type::kInt64: {
2773       codegen_->GenerateLongCompare(left, right);
2774       break;
2775     }
2776     case DataType::Type::kFloat32: {
2777       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2778       if (right.IsConstant()) {
2779         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2780         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2781       } else if (right.IsStackSlot()) {
2782         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2783       } else {
2784         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2785       }
2786       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2787       less_cond = kBelow;  //  ucomis{s,d} sets CF
2788       break;
2789     }
2790     case DataType::Type::kFloat64: {
2791       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2792       if (right.IsConstant()) {
2793         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2794         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2795       } else if (right.IsDoubleStackSlot()) {
2796         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2797       } else {
2798         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2799       }
2800       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2801       less_cond = kBelow;  //  ucomis{s,d} sets CF
2802       break;
2803     }
2804     default:
2805       LOG(FATAL) << "Unexpected compare type " << type;
2806   }
2807 
2808   __ movl(out, Immediate(0));
2809   __ j(kEqual, &done);
2810   __ j(less_cond, &less);
2811 
2812   __ Bind(&greater);
2813   __ movl(out, Immediate(1));
2814   __ jmp(&done);
2815 
2816   __ Bind(&less);
2817   __ movl(out, Immediate(-1));
2818 
2819   __ Bind(&done);
2820 }
2821 
VisitIntConstant(HIntConstant * constant)2822 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2823   LocationSummary* locations =
2824       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2825   locations->SetOut(Location::ConstantLocation(constant));
2826 }
2827 
VisitIntConstant(HIntConstant * constant)2828 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2829   // Will be generated at use site.
2830 }
2831 
VisitNullConstant(HNullConstant * constant)2832 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2833   LocationSummary* locations =
2834       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2835   locations->SetOut(Location::ConstantLocation(constant));
2836 }
2837 
VisitNullConstant(HNullConstant * constant)2838 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2839   // Will be generated at use site.
2840 }
2841 
VisitLongConstant(HLongConstant * constant)2842 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2843   LocationSummary* locations =
2844       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2845   locations->SetOut(Location::ConstantLocation(constant));
2846 }
2847 
VisitLongConstant(HLongConstant * constant)2848 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2849   // Will be generated at use site.
2850 }
2851 
VisitFloatConstant(HFloatConstant * constant)2852 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2853   LocationSummary* locations =
2854       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2855   locations->SetOut(Location::ConstantLocation(constant));
2856 }
2857 
VisitFloatConstant(HFloatConstant * constant)2858 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2859   // Will be generated at use site.
2860 }
2861 
VisitDoubleConstant(HDoubleConstant * constant)2862 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2863   LocationSummary* locations =
2864       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2865   locations->SetOut(Location::ConstantLocation(constant));
2866 }
2867 
VisitDoubleConstant(HDoubleConstant * constant)2868 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2869     [[maybe_unused]] HDoubleConstant* constant) {
2870   // Will be generated at use site.
2871 }
2872 
VisitConstructorFence(HConstructorFence * constructor_fence)2873 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2874   constructor_fence->SetLocations(nullptr);
2875 }
2876 
VisitConstructorFence(HConstructorFence * constructor_fence)2877 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2878     [[maybe_unused]] HConstructorFence* constructor_fence) {
2879   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2880 }
2881 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2882 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2883   memory_barrier->SetLocations(nullptr);
2884 }
2885 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2886 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2887   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2888 }
2889 
VisitReturnVoid(HReturnVoid * ret)2890 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2891   ret->SetLocations(nullptr);
2892 }
2893 
VisitReturnVoid(HReturnVoid * ret)2894 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2895   codegen_->GenerateFrameExit();
2896 }
2897 
VisitReturn(HReturn * ret)2898 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2899   LocationSummary* locations =
2900       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2901   SetInForReturnValue(ret, locations);
2902 }
2903 
VisitReturn(HReturn * ret)2904 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2905   switch (ret->InputAt(0)->GetType()) {
2906     case DataType::Type::kReference:
2907     case DataType::Type::kBool:
2908     case DataType::Type::kUint8:
2909     case DataType::Type::kInt8:
2910     case DataType::Type::kUint16:
2911     case DataType::Type::kInt16:
2912     case DataType::Type::kInt32:
2913     case DataType::Type::kInt64:
2914       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2915       break;
2916 
2917     case DataType::Type::kFloat32: {
2918       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2919                 XMM0);
2920       // To simplify callers of an OSR method, we put the return value in both
2921       // floating point and core register.
2922       if (GetGraph()->IsCompilingOsr()) {
2923         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2924       }
2925       break;
2926     }
2927     case DataType::Type::kFloat64: {
2928       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2929                 XMM0);
2930       // To simplify callers of an OSR method, we put the return value in both
2931       // floating point and core register.
2932       if (GetGraph()->IsCompilingOsr()) {
2933         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2934       }
2935       break;
2936     }
2937 
2938     default:
2939       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2940   }
2941   codegen_->GenerateFrameExit();
2942 }
2943 
GetReturnLocation(DataType::Type type) const2944 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2945   switch (type) {
2946     case DataType::Type::kReference:
2947     case DataType::Type::kBool:
2948     case DataType::Type::kUint8:
2949     case DataType::Type::kInt8:
2950     case DataType::Type::kUint16:
2951     case DataType::Type::kInt16:
2952     case DataType::Type::kUint32:
2953     case DataType::Type::kInt32:
2954     case DataType::Type::kUint64:
2955     case DataType::Type::kInt64:
2956       return Location::RegisterLocation(RAX);
2957 
2958     case DataType::Type::kVoid:
2959       return Location::NoLocation();
2960 
2961     case DataType::Type::kFloat64:
2962     case DataType::Type::kFloat32:
2963       return Location::FpuRegisterLocation(XMM0);
2964   }
2965 }
2966 
GetMethodLocation() const2967 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2968   return Location::RegisterLocation(kMethodRegisterArgument);
2969 }
2970 
GetNextLocation(DataType::Type type)2971 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2972   switch (type) {
2973     case DataType::Type::kReference:
2974     case DataType::Type::kBool:
2975     case DataType::Type::kUint8:
2976     case DataType::Type::kInt8:
2977     case DataType::Type::kUint16:
2978     case DataType::Type::kInt16:
2979     case DataType::Type::kInt32: {
2980       uint32_t index = gp_index_++;
2981       stack_index_++;
2982       if (index < calling_convention.GetNumberOfRegisters()) {
2983         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2984       } else {
2985         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2986       }
2987     }
2988 
2989     case DataType::Type::kInt64: {
2990       uint32_t index = gp_index_;
2991       stack_index_ += 2;
2992       if (index < calling_convention.GetNumberOfRegisters()) {
2993         gp_index_ += 1;
2994         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2995       } else {
2996         gp_index_ += 2;
2997         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2998       }
2999     }
3000 
3001     case DataType::Type::kFloat32: {
3002       uint32_t index = float_index_++;
3003       stack_index_++;
3004       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3005         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3006       } else {
3007         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3008       }
3009     }
3010 
3011     case DataType::Type::kFloat64: {
3012       uint32_t index = float_index_++;
3013       stack_index_ += 2;
3014       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3015         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3016       } else {
3017         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3018       }
3019     }
3020 
3021     case DataType::Type::kUint32:
3022     case DataType::Type::kUint64:
3023     case DataType::Type::kVoid:
3024       LOG(FATAL) << "Unexpected parameter type " << type;
3025       UNREACHABLE();
3026   }
3027   return Location::NoLocation();
3028 }
3029 
GetNextLocation(DataType::Type type)3030 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3031   DCHECK_NE(type, DataType::Type::kReference);
3032 
3033   Location location = Location::NoLocation();
3034   if (DataType::IsFloatingPointType(type)) {
3035     if (fpr_index_ < kParameterFloatRegistersLength) {
3036       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3037       ++fpr_index_;
3038     }
3039   } else {
3040     // Native ABI uses the same registers as managed, except that the method register RDI
3041     // is a normal argument.
3042     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3043       location = Location::RegisterLocation(
3044           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3045       ++gpr_index_;
3046     }
3047   }
3048   if (location.IsInvalid()) {
3049     if (DataType::Is64BitType(type)) {
3050       location = Location::DoubleStackSlot(stack_offset_);
3051     } else {
3052       location = Location::StackSlot(stack_offset_);
3053     }
3054     stack_offset_ += kFramePointerSize;
3055 
3056     if (for_register_allocation_) {
3057       location = Location::Any();
3058     }
3059   }
3060   return location;
3061 }
3062 
GetReturnLocation(DataType::Type type) const3063 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3064     const {
3065   // We perform conversion to the managed ABI return register after the call if needed.
3066   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3067   return dex_calling_convention.GetReturnLocation(type);
3068 }
3069 
GetMethodLocation() const3070 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3071   // Pass the method in the hidden argument RAX.
3072   return Location::RegisterLocation(RAX);
3073 }
3074 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3075 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3076   // The trampoline uses the same calling convention as dex calling conventions,
3077   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3078   // the method_idx.
3079   HandleInvoke(invoke);
3080 }
3081 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3082 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3083   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3084 }
3085 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3086 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3087   // Explicit clinit checks triggered by static invokes must have been pruned by
3088   // art::PrepareForRegisterAllocation.
3089   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3090 
3091   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3092   if (intrinsic.TryDispatch(invoke)) {
3093     return;
3094   }
3095 
3096   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3097     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3098         /*for_register_allocation=*/ true);
3099     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3100     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3101   } else {
3102     HandleInvoke(invoke);
3103   }
3104 }
3105 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3106 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3107   if (invoke->GetLocations()->Intrinsified()) {
3108     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3109     intrinsic.Dispatch(invoke);
3110     return true;
3111   }
3112   return false;
3113 }
3114 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3115 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3116   // Explicit clinit checks triggered by static invokes must have been pruned by
3117   // art::PrepareForRegisterAllocation.
3118   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3119 
3120   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3121     return;
3122   }
3123 
3124   LocationSummary* locations = invoke->GetLocations();
3125   codegen_->GenerateStaticOrDirectCall(
3126       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3127 }
3128 
HandleInvoke(HInvoke * invoke)3129 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3130   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3131   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3132 }
3133 
VisitInvokeVirtual(HInvokeVirtual * invoke)3134 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3135   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3136   if (intrinsic.TryDispatch(invoke)) {
3137     return;
3138   }
3139 
3140   HandleInvoke(invoke);
3141 }
3142 
VisitInvokeVirtual(HInvokeVirtual * invoke)3143 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3144   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3145     return;
3146   }
3147 
3148   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3149   DCHECK(!codegen_->IsLeafMethod());
3150 }
3151 
VisitInvokeInterface(HInvokeInterface * invoke)3152 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3153   HandleInvoke(invoke);
3154   // Add the hidden argument.
3155   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3156     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3157                                     Location::RegisterLocation(RAX));
3158   }
3159   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3160 }
3161 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3162 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3163                                                         CpuRegister klass) {
3164   DCHECK_EQ(RDI, klass.AsRegister());
3165   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3166     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3167     DCHECK(info != nullptr);
3168     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3169         info, GetCompilerOptions(), instruction->AsInvoke());
3170     if (cache != nullptr) {
3171       uint64_t address = reinterpret_cast64<uint64_t>(cache);
3172       NearLabel done;
3173       __ movq(CpuRegister(TMP), Immediate(address));
3174       // Fast path for a monomorphic cache.
3175       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3176       __ j(kEqual, &done);
3177       GenerateInvokeRuntime(
3178           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3179       __ Bind(&done);
3180     } else {
3181       // This is unexpected, but we don't guarantee stable compilation across
3182       // JIT runs so just warn about it.
3183       ScopedObjectAccess soa(Thread::Current());
3184       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3185     }
3186   }
3187 }
3188 
VisitInvokeInterface(HInvokeInterface * invoke)3189 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3190   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3191   LocationSummary* locations = invoke->GetLocations();
3192   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3193   Location receiver = locations->InAt(0);
3194   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3195 
3196   if (receiver.IsStackSlot()) {
3197     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3198     // /* HeapReference<Class> */ temp = temp->klass_
3199     __ movl(temp, Address(temp, class_offset));
3200   } else {
3201     // /* HeapReference<Class> */ temp = receiver->klass_
3202     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3203   }
3204   codegen_->MaybeRecordImplicitNullCheck(invoke);
3205   // Instead of simply (possibly) unpoisoning `temp` here, we should
3206   // emit a read barrier for the previous class reference load.
3207   // However this is not required in practice, as this is an
3208   // intermediate/temporary reference and because the current
3209   // concurrent copying collector keeps the from-space memory
3210   // intact/accessible until the end of the marking phase (the
3211   // concurrent copying collector may not in the future).
3212   __ MaybeUnpoisonHeapReference(temp);
3213 
3214   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3215 
3216   if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3217       invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3218     Location hidden_reg = locations->GetTemp(1);
3219     // Set the hidden argument. This is safe to do this here, as RAX
3220     // won't be modified thereafter, before the `call` instruction.
3221     // We also do it after MaybeGenerateInlineCache that may use RAX.
3222     DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3223     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3224   }
3225 
3226   // temp = temp->GetAddressOfIMT()
3227   __ movq(temp,
3228       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3229   // temp = temp->GetImtEntryAt(method_offset);
3230   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3231       invoke->GetImtIndex(), kX86_64PointerSize));
3232   // temp = temp->GetImtEntryAt(method_offset);
3233   __ movq(temp, Address(temp, method_offset));
3234   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3235     // We pass the method from the IMT in case of a conflict. This will ensure
3236     // we go into the runtime to resolve the actual method.
3237     Location hidden_reg = locations->GetTemp(1);
3238     __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3239   }
3240   // call temp->GetEntryPoint();
3241   __ call(Address(
3242       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3243 
3244   DCHECK(!codegen_->IsLeafMethod());
3245   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3246 }
3247 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3248 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3249   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3250   if (intrinsic.TryDispatch(invoke)) {
3251     return;
3252   }
3253   HandleInvoke(invoke);
3254 }
3255 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3256 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3257   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3258     return;
3259   }
3260   codegen_->GenerateInvokePolymorphicCall(invoke);
3261 }
3262 
VisitInvokeCustom(HInvokeCustom * invoke)3263 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3264   HandleInvoke(invoke);
3265 }
3266 
VisitInvokeCustom(HInvokeCustom * invoke)3267 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3268   codegen_->GenerateInvokeCustomCall(invoke);
3269 }
3270 
VisitNeg(HNeg * neg)3271 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3272   LocationSummary* locations =
3273       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3274   switch (neg->GetResultType()) {
3275     case DataType::Type::kInt32:
3276     case DataType::Type::kInt64:
3277       locations->SetInAt(0, Location::RequiresRegister());
3278       locations->SetOut(Location::SameAsFirstInput());
3279       break;
3280 
3281     case DataType::Type::kFloat32:
3282     case DataType::Type::kFloat64:
3283       locations->SetInAt(0, Location::RequiresFpuRegister());
3284       locations->SetOut(Location::SameAsFirstInput());
3285       locations->AddTemp(Location::RequiresFpuRegister());
3286       break;
3287 
3288     default:
3289       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3290   }
3291 }
3292 
VisitNeg(HNeg * neg)3293 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3294   LocationSummary* locations = neg->GetLocations();
3295   Location out = locations->Out();
3296   Location in = locations->InAt(0);
3297   switch (neg->GetResultType()) {
3298     case DataType::Type::kInt32:
3299       DCHECK(in.IsRegister());
3300       DCHECK(in.Equals(out));
3301       __ negl(out.AsRegister<CpuRegister>());
3302       break;
3303 
3304     case DataType::Type::kInt64:
3305       DCHECK(in.IsRegister());
3306       DCHECK(in.Equals(out));
3307       __ negq(out.AsRegister<CpuRegister>());
3308       break;
3309 
3310     case DataType::Type::kFloat32: {
3311       DCHECK(in.Equals(out));
3312       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3313       // Implement float negation with an exclusive or with value
3314       // 0x80000000 (mask for bit 31, representing the sign of a
3315       // single-precision floating-point number).
3316       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3317       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3318       break;
3319     }
3320 
3321     case DataType::Type::kFloat64: {
3322       DCHECK(in.Equals(out));
3323       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3324       // Implement double negation with an exclusive or with value
3325       // 0x8000000000000000 (mask for bit 63, representing the sign of
3326       // a double-precision floating-point number).
3327       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3328       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3329       break;
3330     }
3331 
3332     default:
3333       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3334   }
3335 }
3336 
VisitTypeConversion(HTypeConversion * conversion)3337 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3338   LocationSummary* locations =
3339       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3340   DataType::Type result_type = conversion->GetResultType();
3341   DataType::Type input_type = conversion->GetInputType();
3342   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3343       << input_type << " -> " << result_type;
3344 
3345   switch (result_type) {
3346     case DataType::Type::kUint8:
3347     case DataType::Type::kInt8:
3348     case DataType::Type::kUint16:
3349     case DataType::Type::kInt16:
3350       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3351       locations->SetInAt(0, Location::Any());
3352       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3353       break;
3354 
3355     case DataType::Type::kInt32:
3356       switch (input_type) {
3357         case DataType::Type::kInt64:
3358           locations->SetInAt(0, Location::Any());
3359           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3360           break;
3361 
3362         case DataType::Type::kFloat32:
3363           locations->SetInAt(0, Location::RequiresFpuRegister());
3364           locations->SetOut(Location::RequiresRegister());
3365           break;
3366 
3367         case DataType::Type::kFloat64:
3368           locations->SetInAt(0, Location::RequiresFpuRegister());
3369           locations->SetOut(Location::RequiresRegister());
3370           break;
3371 
3372         default:
3373           LOG(FATAL) << "Unexpected type conversion from " << input_type
3374                      << " to " << result_type;
3375       }
3376       break;
3377 
3378     case DataType::Type::kInt64:
3379       switch (input_type) {
3380         case DataType::Type::kBool:
3381         case DataType::Type::kUint8:
3382         case DataType::Type::kInt8:
3383         case DataType::Type::kUint16:
3384         case DataType::Type::kInt16:
3385         case DataType::Type::kInt32:
3386           // TODO: We would benefit from a (to-be-implemented)
3387           // Location::RegisterOrStackSlot requirement for this input.
3388           locations->SetInAt(0, Location::RequiresRegister());
3389           locations->SetOut(Location::RequiresRegister());
3390           break;
3391 
3392         case DataType::Type::kFloat32:
3393           locations->SetInAt(0, Location::RequiresFpuRegister());
3394           locations->SetOut(Location::RequiresRegister());
3395           break;
3396 
3397         case DataType::Type::kFloat64:
3398           locations->SetInAt(0, Location::RequiresFpuRegister());
3399           locations->SetOut(Location::RequiresRegister());
3400           break;
3401 
3402         default:
3403           LOG(FATAL) << "Unexpected type conversion from " << input_type
3404                      << " to " << result_type;
3405       }
3406       break;
3407 
3408     case DataType::Type::kFloat32:
3409       switch (input_type) {
3410         case DataType::Type::kBool:
3411         case DataType::Type::kUint8:
3412         case DataType::Type::kInt8:
3413         case DataType::Type::kUint16:
3414         case DataType::Type::kInt16:
3415         case DataType::Type::kInt32:
3416           locations->SetInAt(0, Location::Any());
3417           locations->SetOut(Location::RequiresFpuRegister());
3418           break;
3419 
3420         case DataType::Type::kInt64:
3421           locations->SetInAt(0, Location::Any());
3422           locations->SetOut(Location::RequiresFpuRegister());
3423           break;
3424 
3425         case DataType::Type::kFloat64:
3426           locations->SetInAt(0, Location::Any());
3427           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3428           break;
3429 
3430         default:
3431           LOG(FATAL) << "Unexpected type conversion from " << input_type
3432                      << " to " << result_type;
3433       }
3434       break;
3435 
3436     case DataType::Type::kFloat64:
3437       switch (input_type) {
3438         case DataType::Type::kBool:
3439         case DataType::Type::kUint8:
3440         case DataType::Type::kInt8:
3441         case DataType::Type::kUint16:
3442         case DataType::Type::kInt16:
3443         case DataType::Type::kInt32:
3444           locations->SetInAt(0, Location::Any());
3445           locations->SetOut(Location::RequiresFpuRegister());
3446           break;
3447 
3448         case DataType::Type::kInt64:
3449           locations->SetInAt(0, Location::Any());
3450           locations->SetOut(Location::RequiresFpuRegister());
3451           break;
3452 
3453         case DataType::Type::kFloat32:
3454           locations->SetInAt(0, Location::Any());
3455           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3456           break;
3457 
3458         default:
3459           LOG(FATAL) << "Unexpected type conversion from " << input_type
3460                      << " to " << result_type;
3461       }
3462       break;
3463 
3464     default:
3465       LOG(FATAL) << "Unexpected type conversion from " << input_type
3466                  << " to " << result_type;
3467   }
3468 }
3469 
VisitTypeConversion(HTypeConversion * conversion)3470 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3471   LocationSummary* locations = conversion->GetLocations();
3472   Location out = locations->Out();
3473   Location in = locations->InAt(0);
3474   DataType::Type result_type = conversion->GetResultType();
3475   DataType::Type input_type = conversion->GetInputType();
3476   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3477       << input_type << " -> " << result_type;
3478   switch (result_type) {
3479     case DataType::Type::kUint8:
3480       switch (input_type) {
3481         case DataType::Type::kInt8:
3482         case DataType::Type::kUint16:
3483         case DataType::Type::kInt16:
3484         case DataType::Type::kInt32:
3485         case DataType::Type::kInt64:
3486           if (in.IsRegister()) {
3487             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3488           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3489             __ movzxb(out.AsRegister<CpuRegister>(),
3490                       Address(CpuRegister(RSP), in.GetStackIndex()));
3491           } else {
3492             __ movl(out.AsRegister<CpuRegister>(),
3493                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3494           }
3495           break;
3496 
3497         default:
3498           LOG(FATAL) << "Unexpected type conversion from " << input_type
3499                      << " to " << result_type;
3500       }
3501       break;
3502 
3503     case DataType::Type::kInt8:
3504       switch (input_type) {
3505         case DataType::Type::kUint8:
3506         case DataType::Type::kUint16:
3507         case DataType::Type::kInt16:
3508         case DataType::Type::kInt32:
3509         case DataType::Type::kInt64:
3510           if (in.IsRegister()) {
3511             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3512           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3513             __ movsxb(out.AsRegister<CpuRegister>(),
3514                       Address(CpuRegister(RSP), in.GetStackIndex()));
3515           } else {
3516             __ movl(out.AsRegister<CpuRegister>(),
3517                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3518           }
3519           break;
3520 
3521         default:
3522           LOG(FATAL) << "Unexpected type conversion from " << input_type
3523                      << " to " << result_type;
3524       }
3525       break;
3526 
3527     case DataType::Type::kUint16:
3528       switch (input_type) {
3529         case DataType::Type::kInt8:
3530         case DataType::Type::kInt16:
3531         case DataType::Type::kInt32:
3532         case DataType::Type::kInt64:
3533           if (in.IsRegister()) {
3534             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3535           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3536             __ movzxw(out.AsRegister<CpuRegister>(),
3537                       Address(CpuRegister(RSP), in.GetStackIndex()));
3538           } else {
3539             __ movl(out.AsRegister<CpuRegister>(),
3540                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3541           }
3542           break;
3543 
3544         default:
3545           LOG(FATAL) << "Unexpected type conversion from " << input_type
3546                      << " to " << result_type;
3547       }
3548       break;
3549 
3550     case DataType::Type::kInt16:
3551       switch (input_type) {
3552         case DataType::Type::kUint16:
3553         case DataType::Type::kInt32:
3554         case DataType::Type::kInt64:
3555           if (in.IsRegister()) {
3556             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3557           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3558             __ movsxw(out.AsRegister<CpuRegister>(),
3559                       Address(CpuRegister(RSP), in.GetStackIndex()));
3560           } else {
3561             __ movl(out.AsRegister<CpuRegister>(),
3562                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3563           }
3564           break;
3565 
3566         default:
3567           LOG(FATAL) << "Unexpected type conversion from " << input_type
3568                      << " to " << result_type;
3569       }
3570       break;
3571 
3572     case DataType::Type::kInt32:
3573       switch (input_type) {
3574         case DataType::Type::kInt64:
3575           if (in.IsRegister()) {
3576             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3577           } else if (in.IsDoubleStackSlot()) {
3578             __ movl(out.AsRegister<CpuRegister>(),
3579                     Address(CpuRegister(RSP), in.GetStackIndex()));
3580           } else {
3581             DCHECK(in.IsConstant());
3582             DCHECK(in.GetConstant()->IsLongConstant());
3583             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3584             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3585           }
3586           break;
3587 
3588         case DataType::Type::kFloat32: {
3589           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3590           CpuRegister output = out.AsRegister<CpuRegister>();
3591           NearLabel done, nan;
3592 
3593           __ movl(output, Immediate(kPrimIntMax));
3594           // if input >= (float)INT_MAX goto done
3595           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3596           __ j(kAboveEqual, &done);
3597           // if input == NaN goto nan
3598           __ j(kUnordered, &nan);
3599           // output = float-to-int-truncate(input)
3600           __ cvttss2si(output, input, false);
3601           __ jmp(&done);
3602           __ Bind(&nan);
3603           //  output = 0
3604           __ xorl(output, output);
3605           __ Bind(&done);
3606           break;
3607         }
3608 
3609         case DataType::Type::kFloat64: {
3610           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3611           CpuRegister output = out.AsRegister<CpuRegister>();
3612           NearLabel done, nan;
3613 
3614           __ movl(output, Immediate(kPrimIntMax));
3615           // if input >= (double)INT_MAX goto done
3616           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3617           __ j(kAboveEqual, &done);
3618           // if input == NaN goto nan
3619           __ j(kUnordered, &nan);
3620           // output = double-to-int-truncate(input)
3621           __ cvttsd2si(output, input);
3622           __ jmp(&done);
3623           __ Bind(&nan);
3624           //  output = 0
3625           __ xorl(output, output);
3626           __ Bind(&done);
3627           break;
3628         }
3629 
3630         default:
3631           LOG(FATAL) << "Unexpected type conversion from " << input_type
3632                      << " to " << result_type;
3633       }
3634       break;
3635 
3636     case DataType::Type::kInt64:
3637       switch (input_type) {
3638         DCHECK(out.IsRegister());
3639         case DataType::Type::kBool:
3640         case DataType::Type::kUint8:
3641         case DataType::Type::kInt8:
3642         case DataType::Type::kUint16:
3643         case DataType::Type::kInt16:
3644         case DataType::Type::kInt32:
3645           DCHECK(in.IsRegister());
3646           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3647           break;
3648 
3649         case DataType::Type::kFloat32: {
3650           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3651           CpuRegister output = out.AsRegister<CpuRegister>();
3652           NearLabel done, nan;
3653 
3654           codegen_->Load64BitValue(output, kPrimLongMax);
3655           // if input >= (float)LONG_MAX goto done
3656           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3657           __ j(kAboveEqual, &done);
3658           // if input == NaN goto nan
3659           __ j(kUnordered, &nan);
3660           // output = float-to-long-truncate(input)
3661           __ cvttss2si(output, input, true);
3662           __ jmp(&done);
3663           __ Bind(&nan);
3664           //  output = 0
3665           __ xorl(output, output);
3666           __ Bind(&done);
3667           break;
3668         }
3669 
3670         case DataType::Type::kFloat64: {
3671           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3672           CpuRegister output = out.AsRegister<CpuRegister>();
3673           NearLabel done, nan;
3674 
3675           codegen_->Load64BitValue(output, kPrimLongMax);
3676           // if input >= (double)LONG_MAX goto done
3677           __ comisd(input, codegen_->LiteralDoubleAddress(
3678                 static_cast<double>(kPrimLongMax)));
3679           __ j(kAboveEqual, &done);
3680           // if input == NaN goto nan
3681           __ j(kUnordered, &nan);
3682           // output = double-to-long-truncate(input)
3683           __ cvttsd2si(output, input, true);
3684           __ jmp(&done);
3685           __ Bind(&nan);
3686           //  output = 0
3687           __ xorl(output, output);
3688           __ Bind(&done);
3689           break;
3690         }
3691 
3692         default:
3693           LOG(FATAL) << "Unexpected type conversion from " << input_type
3694                      << " to " << result_type;
3695       }
3696       break;
3697 
3698     case DataType::Type::kFloat32:
3699       switch (input_type) {
3700         case DataType::Type::kBool:
3701         case DataType::Type::kUint8:
3702         case DataType::Type::kInt8:
3703         case DataType::Type::kUint16:
3704         case DataType::Type::kInt16:
3705         case DataType::Type::kInt32:
3706           if (in.IsRegister()) {
3707             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3708           } else if (in.IsConstant()) {
3709             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3710             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3711             codegen_->Load32BitValue(dest, static_cast<float>(v));
3712           } else {
3713             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3714                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3715           }
3716           break;
3717 
3718         case DataType::Type::kInt64:
3719           if (in.IsRegister()) {
3720             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3721           } else if (in.IsConstant()) {
3722             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3723             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3724             codegen_->Load32BitValue(dest, static_cast<float>(v));
3725           } else {
3726             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3727                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3728           }
3729           break;
3730 
3731         case DataType::Type::kFloat64:
3732           if (in.IsFpuRegister()) {
3733             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3734           } else if (in.IsConstant()) {
3735             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3736             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3737             codegen_->Load32BitValue(dest, static_cast<float>(v));
3738           } else {
3739             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3740                         Address(CpuRegister(RSP), in.GetStackIndex()));
3741           }
3742           break;
3743 
3744         default:
3745           LOG(FATAL) << "Unexpected type conversion from " << input_type
3746                      << " to " << result_type;
3747       }
3748       break;
3749 
3750     case DataType::Type::kFloat64:
3751       switch (input_type) {
3752         case DataType::Type::kBool:
3753         case DataType::Type::kUint8:
3754         case DataType::Type::kInt8:
3755         case DataType::Type::kUint16:
3756         case DataType::Type::kInt16:
3757         case DataType::Type::kInt32:
3758           if (in.IsRegister()) {
3759             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3760           } else if (in.IsConstant()) {
3761             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3762             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3763             codegen_->Load64BitValue(dest, static_cast<double>(v));
3764           } else {
3765             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3766                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3767           }
3768           break;
3769 
3770         case DataType::Type::kInt64:
3771           if (in.IsRegister()) {
3772             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3773           } else if (in.IsConstant()) {
3774             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3775             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3776             codegen_->Load64BitValue(dest, static_cast<double>(v));
3777           } else {
3778             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3779                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3780           }
3781           break;
3782 
3783         case DataType::Type::kFloat32:
3784           if (in.IsFpuRegister()) {
3785             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3786           } else if (in.IsConstant()) {
3787             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3788             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3789             codegen_->Load64BitValue(dest, static_cast<double>(v));
3790           } else {
3791             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3792                         Address(CpuRegister(RSP), in.GetStackIndex()));
3793           }
3794           break;
3795 
3796         default:
3797           LOG(FATAL) << "Unexpected type conversion from " << input_type
3798                      << " to " << result_type;
3799       }
3800       break;
3801 
3802     default:
3803       LOG(FATAL) << "Unexpected type conversion from " << input_type
3804                  << " to " << result_type;
3805   }
3806 }
3807 
VisitAdd(HAdd * add)3808 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3809   LocationSummary* locations =
3810       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3811   switch (add->GetResultType()) {
3812     case DataType::Type::kInt32: {
3813       locations->SetInAt(0, Location::RequiresRegister());
3814       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3815       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3816       break;
3817     }
3818 
3819     case DataType::Type::kInt64: {
3820       locations->SetInAt(0, Location::RequiresRegister());
3821       // We can use a leaq or addq if the constant can fit in an immediate.
3822       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3823       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3824       break;
3825     }
3826 
3827     case DataType::Type::kFloat64:
3828     case DataType::Type::kFloat32: {
3829       locations->SetInAt(0, Location::RequiresFpuRegister());
3830       locations->SetInAt(1, Location::Any());
3831       locations->SetOut(Location::SameAsFirstInput());
3832       break;
3833     }
3834 
3835     default:
3836       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3837   }
3838 }
3839 
VisitAdd(HAdd * add)3840 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3841   LocationSummary* locations = add->GetLocations();
3842   Location first = locations->InAt(0);
3843   Location second = locations->InAt(1);
3844   Location out = locations->Out();
3845 
3846   switch (add->GetResultType()) {
3847     case DataType::Type::kInt32: {
3848       if (second.IsRegister()) {
3849         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3850           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3851         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3852           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3853         } else {
3854           __ leal(out.AsRegister<CpuRegister>(), Address(
3855               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3856         }
3857       } else if (second.IsConstant()) {
3858         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3859           __ addl(out.AsRegister<CpuRegister>(),
3860                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3861         } else {
3862           __ leal(out.AsRegister<CpuRegister>(), Address(
3863               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3864         }
3865       } else {
3866         DCHECK(first.Equals(locations->Out()));
3867         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3868       }
3869       break;
3870     }
3871 
3872     case DataType::Type::kInt64: {
3873       if (second.IsRegister()) {
3874         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3875           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3876         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3877           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3878         } else {
3879           __ leaq(out.AsRegister<CpuRegister>(), Address(
3880               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3881         }
3882       } else {
3883         DCHECK(second.IsConstant());
3884         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3885         int32_t int32_value = Low32Bits(value);
3886         DCHECK_EQ(int32_value, value);
3887         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3888           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3889         } else {
3890           __ leaq(out.AsRegister<CpuRegister>(), Address(
3891               first.AsRegister<CpuRegister>(), int32_value));
3892         }
3893       }
3894       break;
3895     }
3896 
3897     case DataType::Type::kFloat32: {
3898       if (second.IsFpuRegister()) {
3899         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3900       } else if (second.IsConstant()) {
3901         __ addss(first.AsFpuRegister<XmmRegister>(),
3902                  codegen_->LiteralFloatAddress(
3903                      second.GetConstant()->AsFloatConstant()->GetValue()));
3904       } else {
3905         DCHECK(second.IsStackSlot());
3906         __ addss(first.AsFpuRegister<XmmRegister>(),
3907                  Address(CpuRegister(RSP), second.GetStackIndex()));
3908       }
3909       break;
3910     }
3911 
3912     case DataType::Type::kFloat64: {
3913       if (second.IsFpuRegister()) {
3914         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3915       } else if (second.IsConstant()) {
3916         __ addsd(first.AsFpuRegister<XmmRegister>(),
3917                  codegen_->LiteralDoubleAddress(
3918                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3919       } else {
3920         DCHECK(second.IsDoubleStackSlot());
3921         __ addsd(first.AsFpuRegister<XmmRegister>(),
3922                  Address(CpuRegister(RSP), second.GetStackIndex()));
3923       }
3924       break;
3925     }
3926 
3927     default:
3928       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3929   }
3930 }
3931 
VisitSub(HSub * sub)3932 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3933   LocationSummary* locations =
3934       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3935   switch (sub->GetResultType()) {
3936     case DataType::Type::kInt32: {
3937       locations->SetInAt(0, Location::RequiresRegister());
3938       locations->SetInAt(1, Location::Any());
3939       locations->SetOut(Location::SameAsFirstInput());
3940       break;
3941     }
3942     case DataType::Type::kInt64: {
3943       locations->SetInAt(0, Location::RequiresRegister());
3944       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3945       locations->SetOut(Location::SameAsFirstInput());
3946       break;
3947     }
3948     case DataType::Type::kFloat32:
3949     case DataType::Type::kFloat64: {
3950       locations->SetInAt(0, Location::RequiresFpuRegister());
3951       locations->SetInAt(1, Location::Any());
3952       locations->SetOut(Location::SameAsFirstInput());
3953       break;
3954     }
3955     default:
3956       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3957   }
3958 }
3959 
VisitSub(HSub * sub)3960 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3961   LocationSummary* locations = sub->GetLocations();
3962   Location first = locations->InAt(0);
3963   Location second = locations->InAt(1);
3964   DCHECK(first.Equals(locations->Out()));
3965   switch (sub->GetResultType()) {
3966     case DataType::Type::kInt32: {
3967       if (second.IsRegister()) {
3968         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3969       } else if (second.IsConstant()) {
3970         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3971         __ subl(first.AsRegister<CpuRegister>(), imm);
3972       } else {
3973         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3974       }
3975       break;
3976     }
3977     case DataType::Type::kInt64: {
3978       if (second.IsConstant()) {
3979         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3980         DCHECK(IsInt<32>(value));
3981         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3982       } else {
3983         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3984       }
3985       break;
3986     }
3987 
3988     case DataType::Type::kFloat32: {
3989       if (second.IsFpuRegister()) {
3990         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3991       } else if (second.IsConstant()) {
3992         __ subss(first.AsFpuRegister<XmmRegister>(),
3993                  codegen_->LiteralFloatAddress(
3994                      second.GetConstant()->AsFloatConstant()->GetValue()));
3995       } else {
3996         DCHECK(second.IsStackSlot());
3997         __ subss(first.AsFpuRegister<XmmRegister>(),
3998                  Address(CpuRegister(RSP), second.GetStackIndex()));
3999       }
4000       break;
4001     }
4002 
4003     case DataType::Type::kFloat64: {
4004       if (second.IsFpuRegister()) {
4005         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4006       } else if (second.IsConstant()) {
4007         __ subsd(first.AsFpuRegister<XmmRegister>(),
4008                  codegen_->LiteralDoubleAddress(
4009                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4010       } else {
4011         DCHECK(second.IsDoubleStackSlot());
4012         __ subsd(first.AsFpuRegister<XmmRegister>(),
4013                  Address(CpuRegister(RSP), second.GetStackIndex()));
4014       }
4015       break;
4016     }
4017 
4018     default:
4019       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4020   }
4021 }
4022 
VisitMul(HMul * mul)4023 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4024   LocationSummary* locations =
4025       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4026   switch (mul->GetResultType()) {
4027     case DataType::Type::kInt32: {
4028       locations->SetInAt(0, Location::RequiresRegister());
4029       locations->SetInAt(1, Location::Any());
4030       if (mul->InputAt(1)->IsIntConstant()) {
4031         // Can use 3 operand multiply.
4032         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4033       } else {
4034         locations->SetOut(Location::SameAsFirstInput());
4035       }
4036       break;
4037     }
4038     case DataType::Type::kInt64: {
4039       locations->SetInAt(0, Location::RequiresRegister());
4040       locations->SetInAt(1, Location::Any());
4041       if (mul->InputAt(1)->IsLongConstant() &&
4042           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4043         // Can use 3 operand multiply.
4044         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4045       } else {
4046         locations->SetOut(Location::SameAsFirstInput());
4047       }
4048       break;
4049     }
4050     case DataType::Type::kFloat32:
4051     case DataType::Type::kFloat64: {
4052       locations->SetInAt(0, Location::RequiresFpuRegister());
4053       locations->SetInAt(1, Location::Any());
4054       locations->SetOut(Location::SameAsFirstInput());
4055       break;
4056     }
4057 
4058     default:
4059       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4060   }
4061 }
4062 
VisitMul(HMul * mul)4063 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4064   LocationSummary* locations = mul->GetLocations();
4065   Location first = locations->InAt(0);
4066   Location second = locations->InAt(1);
4067   Location out = locations->Out();
4068   switch (mul->GetResultType()) {
4069     case DataType::Type::kInt32:
4070       // The constant may have ended up in a register, so test explicitly to avoid
4071       // problems where the output may not be the same as the first operand.
4072       if (mul->InputAt(1)->IsIntConstant()) {
4073         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4074         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4075       } else if (second.IsRegister()) {
4076         DCHECK(first.Equals(out));
4077         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4078       } else {
4079         DCHECK(first.Equals(out));
4080         DCHECK(second.IsStackSlot());
4081         __ imull(first.AsRegister<CpuRegister>(),
4082                  Address(CpuRegister(RSP), second.GetStackIndex()));
4083       }
4084       break;
4085     case DataType::Type::kInt64: {
4086       // The constant may have ended up in a register, so test explicitly to avoid
4087       // problems where the output may not be the same as the first operand.
4088       if (mul->InputAt(1)->IsLongConstant()) {
4089         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4090         if (IsInt<32>(value)) {
4091           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4092                    Immediate(static_cast<int32_t>(value)));
4093         } else {
4094           // Have to use the constant area.
4095           DCHECK(first.Equals(out));
4096           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4097         }
4098       } else if (second.IsRegister()) {
4099         DCHECK(first.Equals(out));
4100         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4101       } else {
4102         DCHECK(second.IsDoubleStackSlot());
4103         DCHECK(first.Equals(out));
4104         __ imulq(first.AsRegister<CpuRegister>(),
4105                  Address(CpuRegister(RSP), second.GetStackIndex()));
4106       }
4107       break;
4108     }
4109 
4110     case DataType::Type::kFloat32: {
4111       DCHECK(first.Equals(out));
4112       if (second.IsFpuRegister()) {
4113         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4114       } else if (second.IsConstant()) {
4115         __ mulss(first.AsFpuRegister<XmmRegister>(),
4116                  codegen_->LiteralFloatAddress(
4117                      second.GetConstant()->AsFloatConstant()->GetValue()));
4118       } else {
4119         DCHECK(second.IsStackSlot());
4120         __ mulss(first.AsFpuRegister<XmmRegister>(),
4121                  Address(CpuRegister(RSP), second.GetStackIndex()));
4122       }
4123       break;
4124     }
4125 
4126     case DataType::Type::kFloat64: {
4127       DCHECK(first.Equals(out));
4128       if (second.IsFpuRegister()) {
4129         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4130       } else if (second.IsConstant()) {
4131         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4132                  codegen_->LiteralDoubleAddress(
4133                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4134       } else {
4135         DCHECK(second.IsDoubleStackSlot());
4136         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4137                  Address(CpuRegister(RSP), second.GetStackIndex()));
4138       }
4139       break;
4140     }
4141 
4142     default:
4143       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4144   }
4145 }
4146 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4147 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4148                                                      uint32_t stack_adjustment, bool is_float) {
4149   if (source.IsStackSlot()) {
4150     DCHECK(is_float);
4151     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4152   } else if (source.IsDoubleStackSlot()) {
4153     DCHECK(!is_float);
4154     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4155   } else {
4156     // Write the value to the temporary location on the stack and load to FP stack.
4157     if (is_float) {
4158       Location stack_temp = Location::StackSlot(temp_offset);
4159       codegen_->Move(stack_temp, source);
4160       __ flds(Address(CpuRegister(RSP), temp_offset));
4161     } else {
4162       Location stack_temp = Location::DoubleStackSlot(temp_offset);
4163       codegen_->Move(stack_temp, source);
4164       __ fldl(Address(CpuRegister(RSP), temp_offset));
4165     }
4166   }
4167 }
4168 
GenerateRemFP(HRem * rem)4169 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4170   DataType::Type type = rem->GetResultType();
4171   bool is_float = type == DataType::Type::kFloat32;
4172   size_t elem_size = DataType::Size(type);
4173   LocationSummary* locations = rem->GetLocations();
4174   Location first = locations->InAt(0);
4175   Location second = locations->InAt(1);
4176   Location out = locations->Out();
4177 
4178   // Create stack space for 2 elements.
4179   // TODO: enhance register allocator to ask for stack temporaries.
4180   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4181 
4182   // Load the values to the FP stack in reverse order, using temporaries if needed.
4183   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4184   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4185 
4186   // Loop doing FPREM until we stabilize.
4187   NearLabel retry;
4188   __ Bind(&retry);
4189   __ fprem();
4190 
4191   // Move FP status to AX.
4192   __ fstsw();
4193 
4194   // And see if the argument reduction is complete. This is signaled by the
4195   // C2 FPU flag bit set to 0.
4196   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4197   __ j(kNotEqual, &retry);
4198 
4199   // We have settled on the final value. Retrieve it into an XMM register.
4200   // Store FP top of stack to real stack.
4201   if (is_float) {
4202     __ fsts(Address(CpuRegister(RSP), 0));
4203   } else {
4204     __ fstl(Address(CpuRegister(RSP), 0));
4205   }
4206 
4207   // Pop the 2 items from the FP stack.
4208   __ fucompp();
4209 
4210   // Load the value from the stack into an XMM register.
4211   DCHECK(out.IsFpuRegister()) << out;
4212   if (is_float) {
4213     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4214   } else {
4215     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4216   }
4217 
4218   // And remove the temporary stack space we allocated.
4219   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4220 }
4221 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4222 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4223   DCHECK(instruction->IsDiv() || instruction->IsRem());
4224 
4225   LocationSummary* locations = instruction->GetLocations();
4226   Location second = locations->InAt(1);
4227   DCHECK(second.IsConstant());
4228 
4229   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4230   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4231   int64_t imm = Int64FromConstant(second.GetConstant());
4232 
4233   DCHECK(imm == 1 || imm == -1);
4234 
4235   switch (instruction->GetResultType()) {
4236     case DataType::Type::kInt32: {
4237       if (instruction->IsRem()) {
4238         __ xorl(output_register, output_register);
4239       } else {
4240         __ movl(output_register, input_register);
4241         if (imm == -1) {
4242           __ negl(output_register);
4243         }
4244       }
4245       break;
4246     }
4247 
4248     case DataType::Type::kInt64: {
4249       if (instruction->IsRem()) {
4250         __ xorl(output_register, output_register);
4251       } else {
4252         __ movq(output_register, input_register);
4253         if (imm == -1) {
4254           __ negq(output_register);
4255         }
4256       }
4257       break;
4258     }
4259 
4260     default:
4261       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4262   }
4263 }
RemByPowerOfTwo(HRem * instruction)4264 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4265   LocationSummary* locations = instruction->GetLocations();
4266   Location second = locations->InAt(1);
4267   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4268   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4269   int64_t imm = Int64FromConstant(second.GetConstant());
4270   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4271   uint64_t abs_imm = AbsOrMin(imm);
4272   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4273   if (instruction->GetResultType() == DataType::Type::kInt32) {
4274     NearLabel done;
4275     __ movl(out, numerator);
4276     __ andl(out, Immediate(abs_imm-1));
4277     __ j(Condition::kZero, &done);
4278     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4279     __ testl(numerator, numerator);
4280     __ cmov(Condition::kLess, out, tmp, false);
4281     __ Bind(&done);
4282 
4283   } else {
4284     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4285     codegen_->Load64BitValue(tmp, abs_imm - 1);
4286     NearLabel done;
4287 
4288     __ movq(out, numerator);
4289     __ andq(out, tmp);
4290     __ j(Condition::kZero, &done);
4291     __ movq(tmp, numerator);
4292     __ sarq(tmp, Immediate(63));
4293     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4294     __ orq(out, tmp);
4295     __ Bind(&done);
4296   }
4297 }
DivByPowerOfTwo(HDiv * instruction)4298 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4299   LocationSummary* locations = instruction->GetLocations();
4300   Location second = locations->InAt(1);
4301 
4302   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4303   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4304 
4305   int64_t imm = Int64FromConstant(second.GetConstant());
4306   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4307   uint64_t abs_imm = AbsOrMin(imm);
4308 
4309   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4310 
4311   if (instruction->GetResultType() == DataType::Type::kInt32) {
4312     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4313     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4314     if (abs_imm == 2) {
4315       __ leal(tmp, Address(numerator, 0));
4316       __ shrl(tmp, Immediate(31));
4317       __ addl(tmp, numerator);
4318     } else {
4319       __ leal(tmp, Address(numerator, abs_imm - 1));
4320       __ testl(numerator, numerator);
4321       __ cmov(kGreaterEqual, tmp, numerator);
4322     }
4323     int shift = CTZ(imm);
4324     __ sarl(tmp, Immediate(shift));
4325 
4326     if (imm < 0) {
4327       __ negl(tmp);
4328     }
4329 
4330     __ movl(output_register, tmp);
4331   } else {
4332     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4333     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4334     if (abs_imm == 2) {
4335       __ movq(rdx, numerator);
4336       __ shrq(rdx, Immediate(63));
4337       __ addq(rdx, numerator);
4338     } else {
4339       codegen_->Load64BitValue(rdx, abs_imm - 1);
4340       __ addq(rdx, numerator);
4341       __ testq(numerator, numerator);
4342       __ cmov(kGreaterEqual, rdx, numerator);
4343     }
4344     int shift = CTZ(imm);
4345     __ sarq(rdx, Immediate(shift));
4346 
4347     if (imm < 0) {
4348       __ negq(rdx);
4349     }
4350 
4351     __ movq(output_register, rdx);
4352   }
4353 }
4354 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4355 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4356   DCHECK(instruction->IsDiv() || instruction->IsRem());
4357 
4358   LocationSummary* locations = instruction->GetLocations();
4359   Location second = locations->InAt(1);
4360 
4361   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4362       : locations->GetTemp(0).AsRegister<CpuRegister>();
4363   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4364   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4365       : locations->Out().AsRegister<CpuRegister>();
4366   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4367 
4368   DCHECK_EQ(RAX, eax.AsRegister());
4369   DCHECK_EQ(RDX, edx.AsRegister());
4370   if (instruction->IsDiv()) {
4371     DCHECK_EQ(RAX, out.AsRegister());
4372   } else {
4373     DCHECK_EQ(RDX, out.AsRegister());
4374   }
4375 
4376   int64_t magic;
4377   int shift;
4378 
4379   // TODO: can these branches be written as one?
4380   if (instruction->GetResultType() == DataType::Type::kInt32) {
4381     int imm = second.GetConstant()->AsIntConstant()->GetValue();
4382 
4383     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4384 
4385     __ movl(numerator, eax);
4386 
4387     __ movl(eax, Immediate(magic));
4388     __ imull(numerator);
4389 
4390     if (imm > 0 && magic < 0) {
4391       __ addl(edx, numerator);
4392     } else if (imm < 0 && magic > 0) {
4393       __ subl(edx, numerator);
4394     }
4395 
4396     if (shift != 0) {
4397       __ sarl(edx, Immediate(shift));
4398     }
4399 
4400     __ movl(eax, edx);
4401     __ shrl(edx, Immediate(31));
4402     __ addl(edx, eax);
4403 
4404     if (instruction->IsRem()) {
4405       __ movl(eax, numerator);
4406       __ imull(edx, Immediate(imm));
4407       __ subl(eax, edx);
4408       __ movl(edx, eax);
4409     } else {
4410       __ movl(eax, edx);
4411     }
4412   } else {
4413     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4414 
4415     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4416 
4417     CpuRegister rax = eax;
4418     CpuRegister rdx = edx;
4419 
4420     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4421 
4422     // Save the numerator.
4423     __ movq(numerator, rax);
4424 
4425     // RAX = magic
4426     codegen_->Load64BitValue(rax, magic);
4427 
4428     // RDX:RAX = magic * numerator
4429     __ imulq(numerator);
4430 
4431     if (imm > 0 && magic < 0) {
4432       // RDX += numerator
4433       __ addq(rdx, numerator);
4434     } else if (imm < 0 && magic > 0) {
4435       // RDX -= numerator
4436       __ subq(rdx, numerator);
4437     }
4438 
4439     // Shift if needed.
4440     if (shift != 0) {
4441       __ sarq(rdx, Immediate(shift));
4442     }
4443 
4444     // RDX += 1 if RDX < 0
4445     __ movq(rax, rdx);
4446     __ shrq(rdx, Immediate(63));
4447     __ addq(rdx, rax);
4448 
4449     if (instruction->IsRem()) {
4450       __ movq(rax, numerator);
4451 
4452       if (IsInt<32>(imm)) {
4453         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4454       } else {
4455         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4456       }
4457 
4458       __ subq(rax, rdx);
4459       __ movq(rdx, rax);
4460     } else {
4461       __ movq(rax, rdx);
4462     }
4463   }
4464 }
4465 
GenerateDivRemIntegral(HBinaryOperation * instruction)4466 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4467   DCHECK(instruction->IsDiv() || instruction->IsRem());
4468   DataType::Type type = instruction->GetResultType();
4469   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4470 
4471   bool is_div = instruction->IsDiv();
4472   LocationSummary* locations = instruction->GetLocations();
4473 
4474   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4475   Location second = locations->InAt(1);
4476 
4477   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4478   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4479 
4480   if (second.IsConstant()) {
4481     int64_t imm = Int64FromConstant(second.GetConstant());
4482 
4483     if (imm == 0) {
4484       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4485     } else if (imm == 1 || imm == -1) {
4486       DivRemOneOrMinusOne(instruction);
4487     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4488       if (is_div) {
4489         DivByPowerOfTwo(instruction->AsDiv());
4490       } else {
4491         RemByPowerOfTwo(instruction->AsRem());
4492       }
4493     } else {
4494       DCHECK(imm <= -2 || imm >= 2);
4495       GenerateDivRemWithAnyConstant(instruction);
4496     }
4497   } else {
4498     SlowPathCode* slow_path =
4499         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4500             instruction, out.AsRegister(), type, is_div);
4501     codegen_->AddSlowPath(slow_path);
4502 
4503     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4504     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4505     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4506     // so it's safe to just use negl instead of more complex comparisons.
4507     if (type == DataType::Type::kInt32) {
4508       __ cmpl(second_reg, Immediate(-1));
4509       __ j(kEqual, slow_path->GetEntryLabel());
4510       // edx:eax <- sign-extended of eax
4511       __ cdq();
4512       // eax = quotient, edx = remainder
4513       __ idivl(second_reg);
4514     } else {
4515       __ cmpq(second_reg, Immediate(-1));
4516       __ j(kEqual, slow_path->GetEntryLabel());
4517       // rdx:rax <- sign-extended of rax
4518       __ cqo();
4519       // rax = quotient, rdx = remainder
4520       __ idivq(second_reg);
4521     }
4522     __ Bind(slow_path->GetExitLabel());
4523   }
4524 }
4525 
VisitDiv(HDiv * div)4526 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4527   LocationSummary* locations =
4528       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4529   switch (div->GetResultType()) {
4530     case DataType::Type::kInt32:
4531     case DataType::Type::kInt64: {
4532       locations->SetInAt(0, Location::RegisterLocation(RAX));
4533       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4534       locations->SetOut(Location::SameAsFirstInput());
4535       // Intel uses edx:eax as the dividend.
4536       locations->AddTemp(Location::RegisterLocation(RDX));
4537       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4538       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4539       // output and request another temp.
4540       if (div->InputAt(1)->IsConstant()) {
4541         locations->AddTemp(Location::RequiresRegister());
4542       }
4543       break;
4544     }
4545 
4546     case DataType::Type::kFloat32:
4547     case DataType::Type::kFloat64: {
4548       locations->SetInAt(0, Location::RequiresFpuRegister());
4549       locations->SetInAt(1, Location::Any());
4550       locations->SetOut(Location::SameAsFirstInput());
4551       break;
4552     }
4553 
4554     default:
4555       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4556   }
4557 }
4558 
VisitDiv(HDiv * div)4559 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4560   LocationSummary* locations = div->GetLocations();
4561   Location first = locations->InAt(0);
4562   Location second = locations->InAt(1);
4563   DCHECK(first.Equals(locations->Out()));
4564 
4565   DataType::Type type = div->GetResultType();
4566   switch (type) {
4567     case DataType::Type::kInt32:
4568     case DataType::Type::kInt64: {
4569       GenerateDivRemIntegral(div);
4570       break;
4571     }
4572 
4573     case DataType::Type::kFloat32: {
4574       if (second.IsFpuRegister()) {
4575         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4576       } else if (second.IsConstant()) {
4577         __ divss(first.AsFpuRegister<XmmRegister>(),
4578                  codegen_->LiteralFloatAddress(
4579                      second.GetConstant()->AsFloatConstant()->GetValue()));
4580       } else {
4581         DCHECK(second.IsStackSlot());
4582         __ divss(first.AsFpuRegister<XmmRegister>(),
4583                  Address(CpuRegister(RSP), second.GetStackIndex()));
4584       }
4585       break;
4586     }
4587 
4588     case DataType::Type::kFloat64: {
4589       if (second.IsFpuRegister()) {
4590         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4591       } else if (second.IsConstant()) {
4592         __ divsd(first.AsFpuRegister<XmmRegister>(),
4593                  codegen_->LiteralDoubleAddress(
4594                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4595       } else {
4596         DCHECK(second.IsDoubleStackSlot());
4597         __ divsd(first.AsFpuRegister<XmmRegister>(),
4598                  Address(CpuRegister(RSP), second.GetStackIndex()));
4599       }
4600       break;
4601     }
4602 
4603     default:
4604       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4605   }
4606 }
4607 
VisitRem(HRem * rem)4608 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4609   DataType::Type type = rem->GetResultType();
4610   LocationSummary* locations =
4611     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4612 
4613   switch (type) {
4614     case DataType::Type::kInt32:
4615     case DataType::Type::kInt64: {
4616       locations->SetInAt(0, Location::RegisterLocation(RAX));
4617       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4618       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4619       locations->SetOut(Location::RegisterLocation(RDX));
4620       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4621       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4622       // output and request another temp.
4623       if (rem->InputAt(1)->IsConstant()) {
4624         locations->AddTemp(Location::RequiresRegister());
4625       }
4626       break;
4627     }
4628 
4629     case DataType::Type::kFloat32:
4630     case DataType::Type::kFloat64: {
4631       locations->SetInAt(0, Location::Any());
4632       locations->SetInAt(1, Location::Any());
4633       locations->SetOut(Location::RequiresFpuRegister());
4634       locations->AddTemp(Location::RegisterLocation(RAX));
4635       break;
4636     }
4637 
4638     default:
4639       LOG(FATAL) << "Unexpected rem type " << type;
4640   }
4641 }
4642 
VisitRem(HRem * rem)4643 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4644   DataType::Type type = rem->GetResultType();
4645   switch (type) {
4646     case DataType::Type::kInt32:
4647     case DataType::Type::kInt64: {
4648       GenerateDivRemIntegral(rem);
4649       break;
4650     }
4651     case DataType::Type::kFloat32:
4652     case DataType::Type::kFloat64: {
4653       GenerateRemFP(rem);
4654       break;
4655     }
4656     default:
4657       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4658   }
4659 }
4660 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4661 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4662   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4663   switch (minmax->GetResultType()) {
4664     case DataType::Type::kInt32:
4665     case DataType::Type::kInt64:
4666       locations->SetInAt(0, Location::RequiresRegister());
4667       locations->SetInAt(1, Location::RequiresRegister());
4668       locations->SetOut(Location::SameAsFirstInput());
4669       break;
4670     case DataType::Type::kFloat32:
4671     case DataType::Type::kFloat64:
4672       locations->SetInAt(0, Location::RequiresFpuRegister());
4673       locations->SetInAt(1, Location::RequiresFpuRegister());
4674       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4675       // the second input to be the output (we can simply swap inputs).
4676       locations->SetOut(Location::SameAsFirstInput());
4677       break;
4678     default:
4679       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4680   }
4681 }
4682 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4683 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4684                                                        bool is_min,
4685                                                        DataType::Type type) {
4686   Location op1_loc = locations->InAt(0);
4687   Location op2_loc = locations->InAt(1);
4688 
4689   // Shortcut for same input locations.
4690   if (op1_loc.Equals(op2_loc)) {
4691     // Can return immediately, as op1_loc == out_loc.
4692     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4693     //       a copy here.
4694     DCHECK(locations->Out().Equals(op1_loc));
4695     return;
4696   }
4697 
4698   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4699   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4700 
4701   //  (out := op1)
4702   //  out <=? op2
4703   //  if out is min jmp done
4704   //  out := op2
4705   // done:
4706 
4707   if (type == DataType::Type::kInt64) {
4708     __ cmpq(out, op2);
4709     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4710   } else {
4711     DCHECK_EQ(type, DataType::Type::kInt32);
4712     __ cmpl(out, op2);
4713     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4714   }
4715 }
4716 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4717 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4718                                                       bool is_min,
4719                                                       DataType::Type type) {
4720   Location op1_loc = locations->InAt(0);
4721   Location op2_loc = locations->InAt(1);
4722   Location out_loc = locations->Out();
4723   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4724 
4725   // Shortcut for same input locations.
4726   if (op1_loc.Equals(op2_loc)) {
4727     DCHECK(out_loc.Equals(op1_loc));
4728     return;
4729   }
4730 
4731   //  (out := op1)
4732   //  out <=? op2
4733   //  if Nan jmp Nan_label
4734   //  if out is min jmp done
4735   //  if op2 is min jmp op2_label
4736   //  handle -0/+0
4737   //  jmp done
4738   // Nan_label:
4739   //  out := NaN
4740   // op2_label:
4741   //  out := op2
4742   // done:
4743   //
4744   // This removes one jmp, but needs to copy one input (op1) to out.
4745   //
4746   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4747 
4748   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4749 
4750   NearLabel nan, done, op2_label;
4751   if (type == DataType::Type::kFloat64) {
4752     __ ucomisd(out, op2);
4753   } else {
4754     DCHECK_EQ(type, DataType::Type::kFloat32);
4755     __ ucomiss(out, op2);
4756   }
4757 
4758   __ j(Condition::kParityEven, &nan);
4759 
4760   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4761   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4762 
4763   // Handle 0.0/-0.0.
4764   if (is_min) {
4765     if (type == DataType::Type::kFloat64) {
4766       __ orpd(out, op2);
4767     } else {
4768       __ orps(out, op2);
4769     }
4770   } else {
4771     if (type == DataType::Type::kFloat64) {
4772       __ andpd(out, op2);
4773     } else {
4774       __ andps(out, op2);
4775     }
4776   }
4777   __ jmp(&done);
4778 
4779   // NaN handling.
4780   __ Bind(&nan);
4781   if (type == DataType::Type::kFloat64) {
4782     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4783   } else {
4784     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4785   }
4786   __ jmp(&done);
4787 
4788   // out := op2;
4789   __ Bind(&op2_label);
4790   if (type == DataType::Type::kFloat64) {
4791     __ movsd(out, op2);
4792   } else {
4793     __ movss(out, op2);
4794   }
4795 
4796   // Done.
4797   __ Bind(&done);
4798 }
4799 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4800 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4801   DataType::Type type = minmax->GetResultType();
4802   switch (type) {
4803     case DataType::Type::kInt32:
4804     case DataType::Type::kInt64:
4805       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4806       break;
4807     case DataType::Type::kFloat32:
4808     case DataType::Type::kFloat64:
4809       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4810       break;
4811     default:
4812       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4813   }
4814 }
4815 
VisitMin(HMin * min)4816 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4817   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4818 }
4819 
VisitMin(HMin * min)4820 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4821   GenerateMinMax(min, /*is_min*/ true);
4822 }
4823 
VisitMax(HMax * max)4824 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4825   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4826 }
4827 
VisitMax(HMax * max)4828 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4829   GenerateMinMax(max, /*is_min*/ false);
4830 }
4831 
VisitAbs(HAbs * abs)4832 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4833   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4834   switch (abs->GetResultType()) {
4835     case DataType::Type::kInt32:
4836     case DataType::Type::kInt64:
4837       locations->SetInAt(0, Location::RequiresRegister());
4838       locations->SetOut(Location::SameAsFirstInput());
4839       locations->AddTemp(Location::RequiresRegister());
4840       break;
4841     case DataType::Type::kFloat32:
4842     case DataType::Type::kFloat64:
4843       locations->SetInAt(0, Location::RequiresFpuRegister());
4844       locations->SetOut(Location::SameAsFirstInput());
4845       locations->AddTemp(Location::RequiresFpuRegister());
4846       break;
4847     default:
4848       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4849   }
4850 }
4851 
VisitAbs(HAbs * abs)4852 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4853   LocationSummary* locations = abs->GetLocations();
4854   switch (abs->GetResultType()) {
4855     case DataType::Type::kInt32: {
4856       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4857       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4858       // Create mask.
4859       __ movl(mask, out);
4860       __ sarl(mask, Immediate(31));
4861       // Add mask.
4862       __ addl(out, mask);
4863       __ xorl(out, mask);
4864       break;
4865     }
4866     case DataType::Type::kInt64: {
4867       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4868       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4869       // Create mask.
4870       __ movq(mask, out);
4871       __ sarq(mask, Immediate(63));
4872       // Add mask.
4873       __ addq(out, mask);
4874       __ xorq(out, mask);
4875       break;
4876     }
4877     case DataType::Type::kFloat32: {
4878       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4879       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4880       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4881       __ andps(out, mask);
4882       break;
4883     }
4884     case DataType::Type::kFloat64: {
4885       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4886       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4887       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4888       __ andpd(out, mask);
4889       break;
4890     }
4891     default:
4892       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4893   }
4894 }
4895 
VisitDivZeroCheck(HDivZeroCheck * instruction)4896 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4897   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4898   locations->SetInAt(0, Location::Any());
4899 }
4900 
VisitDivZeroCheck(HDivZeroCheck * instruction)4901 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4902   SlowPathCode* slow_path =
4903       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4904   codegen_->AddSlowPath(slow_path);
4905 
4906   LocationSummary* locations = instruction->GetLocations();
4907   Location value = locations->InAt(0);
4908 
4909   switch (instruction->GetType()) {
4910     case DataType::Type::kBool:
4911     case DataType::Type::kUint8:
4912     case DataType::Type::kInt8:
4913     case DataType::Type::kUint16:
4914     case DataType::Type::kInt16:
4915     case DataType::Type::kInt32: {
4916       if (value.IsRegister()) {
4917         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4918         __ j(kEqual, slow_path->GetEntryLabel());
4919       } else if (value.IsStackSlot()) {
4920         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4921         __ j(kEqual, slow_path->GetEntryLabel());
4922       } else {
4923         DCHECK(value.IsConstant()) << value;
4924         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4925           __ jmp(slow_path->GetEntryLabel());
4926         }
4927       }
4928       break;
4929     }
4930     case DataType::Type::kInt64: {
4931       if (value.IsRegister()) {
4932         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4933         __ j(kEqual, slow_path->GetEntryLabel());
4934       } else if (value.IsDoubleStackSlot()) {
4935         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4936         __ j(kEqual, slow_path->GetEntryLabel());
4937       } else {
4938         DCHECK(value.IsConstant()) << value;
4939         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4940           __ jmp(slow_path->GetEntryLabel());
4941         }
4942       }
4943       break;
4944     }
4945     default:
4946       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4947   }
4948 }
4949 
HandleShift(HBinaryOperation * op)4950 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4951   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4952 
4953   LocationSummary* locations =
4954       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4955 
4956   switch (op->GetResultType()) {
4957     case DataType::Type::kInt32:
4958     case DataType::Type::kInt64: {
4959       locations->SetInAt(0, Location::RequiresRegister());
4960       // The shift count needs to be in CL.
4961       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4962       locations->SetOut(Location::SameAsFirstInput());
4963       break;
4964     }
4965     default:
4966       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4967   }
4968 }
4969 
HandleShift(HBinaryOperation * op)4970 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4971   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4972 
4973   LocationSummary* locations = op->GetLocations();
4974   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4975   Location second = locations->InAt(1);
4976 
4977   switch (op->GetResultType()) {
4978     case DataType::Type::kInt32: {
4979       if (second.IsRegister()) {
4980         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4981         if (op->IsShl()) {
4982           __ shll(first_reg, second_reg);
4983         } else if (op->IsShr()) {
4984           __ sarl(first_reg, second_reg);
4985         } else {
4986           __ shrl(first_reg, second_reg);
4987         }
4988       } else {
4989         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4990         if (op->IsShl()) {
4991           __ shll(first_reg, imm);
4992         } else if (op->IsShr()) {
4993           __ sarl(first_reg, imm);
4994         } else {
4995           __ shrl(first_reg, imm);
4996         }
4997       }
4998       break;
4999     }
5000     case DataType::Type::kInt64: {
5001       if (second.IsRegister()) {
5002         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5003         if (op->IsShl()) {
5004           __ shlq(first_reg, second_reg);
5005         } else if (op->IsShr()) {
5006           __ sarq(first_reg, second_reg);
5007         } else {
5008           __ shrq(first_reg, second_reg);
5009         }
5010       } else {
5011         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5012         if (op->IsShl()) {
5013           __ shlq(first_reg, imm);
5014         } else if (op->IsShr()) {
5015           __ sarq(first_reg, imm);
5016         } else {
5017           __ shrq(first_reg, imm);
5018         }
5019       }
5020       break;
5021     }
5022     default:
5023       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5024       UNREACHABLE();
5025   }
5026 }
5027 
VisitRor(HRor * ror)5028 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5029   LocationSummary* locations =
5030       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5031 
5032   switch (ror->GetResultType()) {
5033     case DataType::Type::kInt32:
5034     case DataType::Type::kInt64: {
5035       locations->SetInAt(0, Location::RequiresRegister());
5036       // The shift count needs to be in CL (unless it is a constant).
5037       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
5038       locations->SetOut(Location::SameAsFirstInput());
5039       break;
5040     }
5041     default:
5042       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5043       UNREACHABLE();
5044   }
5045 }
5046 
VisitRor(HRor * ror)5047 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5048   LocationSummary* locations = ror->GetLocations();
5049   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5050   Location second = locations->InAt(1);
5051 
5052   switch (ror->GetResultType()) {
5053     case DataType::Type::kInt32:
5054       if (second.IsRegister()) {
5055         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5056         __ rorl(first_reg, second_reg);
5057       } else {
5058         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5059         __ rorl(first_reg, imm);
5060       }
5061       break;
5062     case DataType::Type::kInt64:
5063       if (second.IsRegister()) {
5064         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5065         __ rorq(first_reg, second_reg);
5066       } else {
5067         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5068         __ rorq(first_reg, imm);
5069       }
5070       break;
5071     default:
5072       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5073       UNREACHABLE();
5074   }
5075 }
5076 
VisitShl(HShl * shl)5077 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5078   HandleShift(shl);
5079 }
5080 
VisitShl(HShl * shl)5081 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5082   HandleShift(shl);
5083 }
5084 
VisitShr(HShr * shr)5085 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5086   HandleShift(shr);
5087 }
5088 
VisitShr(HShr * shr)5089 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5090   HandleShift(shr);
5091 }
5092 
VisitUShr(HUShr * ushr)5093 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5094   HandleShift(ushr);
5095 }
5096 
VisitUShr(HUShr * ushr)5097 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5098   HandleShift(ushr);
5099 }
5100 
VisitNewInstance(HNewInstance * instruction)5101 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5102   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5103       instruction, LocationSummary::kCallOnMainOnly);
5104   InvokeRuntimeCallingConvention calling_convention;
5105   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5106   locations->SetOut(Location::RegisterLocation(RAX));
5107 }
5108 
VisitNewInstance(HNewInstance * instruction)5109 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5110   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5111   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5112   DCHECK(!codegen_->IsLeafMethod());
5113 }
5114 
VisitNewArray(HNewArray * instruction)5115 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5116   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5117       instruction, LocationSummary::kCallOnMainOnly);
5118   InvokeRuntimeCallingConvention calling_convention;
5119   locations->SetOut(Location::RegisterLocation(RAX));
5120   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5121   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5122 }
5123 
VisitNewArray(HNewArray * instruction)5124 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5125   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5126   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5127   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5128   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5129   DCHECK(!codegen_->IsLeafMethod());
5130 }
5131 
VisitParameterValue(HParameterValue * instruction)5132 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5133   LocationSummary* locations =
5134       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5135   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5136   if (location.IsStackSlot()) {
5137     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5138   } else if (location.IsDoubleStackSlot()) {
5139     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5140   }
5141   locations->SetOut(location);
5142 }
5143 
VisitParameterValue(HParameterValue * instruction)5144 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5145     [[maybe_unused]] HParameterValue* instruction) {
5146   // Nothing to do, the parameter is already at its location.
5147 }
5148 
VisitCurrentMethod(HCurrentMethod * instruction)5149 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5150   LocationSummary* locations =
5151       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5152   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5153 }
5154 
VisitCurrentMethod(HCurrentMethod * instruction)5155 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5156     [[maybe_unused]] HCurrentMethod* instruction) {
5157   // Nothing to do, the method is already at its location.
5158 }
5159 
VisitClassTableGet(HClassTableGet * instruction)5160 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5161   LocationSummary* locations =
5162       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5163   locations->SetInAt(0, Location::RequiresRegister());
5164   locations->SetOut(Location::RequiresRegister());
5165 }
5166 
VisitClassTableGet(HClassTableGet * instruction)5167 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5168   LocationSummary* locations = instruction->GetLocations();
5169   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5170     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5171         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5172     __ movq(locations->Out().AsRegister<CpuRegister>(),
5173             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5174   } else {
5175     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5176         instruction->GetIndex(), kX86_64PointerSize));
5177     __ movq(locations->Out().AsRegister<CpuRegister>(),
5178             Address(locations->InAt(0).AsRegister<CpuRegister>(),
5179             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5180     __ movq(locations->Out().AsRegister<CpuRegister>(),
5181             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5182   }
5183 }
5184 
VisitNot(HNot * not_)5185 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5186   LocationSummary* locations =
5187       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5188   locations->SetInAt(0, Location::RequiresRegister());
5189   locations->SetOut(Location::SameAsFirstInput());
5190 }
5191 
VisitNot(HNot * not_)5192 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5193   LocationSummary* locations = not_->GetLocations();
5194   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5195             locations->Out().AsRegister<CpuRegister>().AsRegister());
5196   Location out = locations->Out();
5197   switch (not_->GetResultType()) {
5198     case DataType::Type::kInt32:
5199       __ notl(out.AsRegister<CpuRegister>());
5200       break;
5201 
5202     case DataType::Type::kInt64:
5203       __ notq(out.AsRegister<CpuRegister>());
5204       break;
5205 
5206     default:
5207       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5208   }
5209 }
5210 
VisitBooleanNot(HBooleanNot * bool_not)5211 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5212   LocationSummary* locations =
5213       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5214   locations->SetInAt(0, Location::RequiresRegister());
5215   locations->SetOut(Location::SameAsFirstInput());
5216 }
5217 
VisitBooleanNot(HBooleanNot * bool_not)5218 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5219   LocationSummary* locations = bool_not->GetLocations();
5220   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5221             locations->Out().AsRegister<CpuRegister>().AsRegister());
5222   Location out = locations->Out();
5223   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5224 }
5225 
VisitPhi(HPhi * instruction)5226 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5227   LocationSummary* locations =
5228       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5229   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5230     locations->SetInAt(i, Location::Any());
5231   }
5232   locations->SetOut(Location::Any());
5233 }
5234 
VisitPhi(HPhi * instruction)5235 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5236   LOG(FATAL) << "Unimplemented";
5237 }
5238 
GenerateMemoryBarrier(MemBarrierKind kind)5239 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5240   /*
5241    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5242    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5243    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5244    */
5245   switch (kind) {
5246     case MemBarrierKind::kAnyAny: {
5247       MemoryFence();
5248       break;
5249     }
5250     case MemBarrierKind::kAnyStore:
5251     case MemBarrierKind::kLoadAny:
5252     case MemBarrierKind::kStoreStore: {
5253       // nop
5254       break;
5255     }
5256     case MemBarrierKind::kNTStoreStore:
5257       // Non-Temporal Store/Store needs an explicit fence.
5258       MemoryFence(/* non-temporal= */ true);
5259       break;
5260   }
5261 }
5262 
HandleFieldGet(HInstruction * instruction)5263 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5264   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5265 
5266   bool object_field_get_with_read_barrier =
5267       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5268   LocationSummary* locations =
5269       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5270                                                        object_field_get_with_read_barrier
5271                                                            ? LocationSummary::kCallOnSlowPath
5272                                                            : LocationSummary::kNoCall);
5273   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5274     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5275   }
5276   // receiver_input
5277   locations->SetInAt(0, Location::RequiresRegister());
5278   if (DataType::IsFloatingPointType(instruction->GetType())) {
5279     locations->SetOut(Location::RequiresFpuRegister());
5280   } else {
5281     // The output overlaps for an object field get when read barriers are
5282     // enabled: we do not want the move to overwrite the object's location, as
5283     // we need it to emit the read barrier. For predicated instructions we can
5284     // always overlap since the output is SameAsFirst and the default value.
5285     locations->SetOut(
5286         Location::RequiresRegister(),
5287         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5288   }
5289 }
5290 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5291 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5292                                                     const FieldInfo& field_info) {
5293   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5294 
5295   LocationSummary* locations = instruction->GetLocations();
5296   Location base_loc = locations->InAt(0);
5297   CpuRegister base = base_loc.AsRegister<CpuRegister>();
5298   Location out = locations->Out();
5299   bool is_volatile = field_info.IsVolatile();
5300   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5301   DataType::Type load_type = instruction->GetType();
5302   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5303 
5304   if (load_type == DataType::Type::kReference) {
5305     // /* HeapReference<Object> */ out = *(base + offset)
5306     if (codegen_->EmitBakerReadBarrier()) {
5307       // Note that a potential implicit null check is handled in this
5308       // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5309       codegen_->GenerateFieldLoadWithBakerReadBarrier(
5310           instruction, out, base, offset, /* needs_null_check= */ true);
5311       if (is_volatile) {
5312         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5313       }
5314     } else {
5315       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5316       codegen_->MaybeRecordImplicitNullCheck(instruction);
5317       if (is_volatile) {
5318         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5319       }
5320       // If read barriers are enabled, emit read barriers other than
5321       // Baker's using a slow path (and also unpoison the loaded
5322       // reference, if heap poisoning is enabled).
5323       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5324     }
5325   } else {
5326     codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5327     codegen_->MaybeRecordImplicitNullCheck(instruction);
5328     if (is_volatile) {
5329       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5330     }
5331   }
5332 }
5333 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5334 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5335                                             const FieldInfo& field_info,
5336                                             WriteBarrierKind write_barrier_kind) {
5337   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5338 
5339   LocationSummary* locations =
5340       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5341   DataType::Type field_type = field_info.GetFieldType();
5342   bool is_volatile = field_info.IsVolatile();
5343   bool needs_write_barrier =
5344       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5345   bool check_gc_card =
5346       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5347 
5348   locations->SetInAt(0, Location::RequiresRegister());
5349   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5350     if (is_volatile) {
5351       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5352       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5353     } else {
5354       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5355     }
5356   } else {
5357     if (is_volatile) {
5358       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5359       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5360     } else {
5361       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5362     }
5363   }
5364 
5365   // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5366   // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5367   if (needs_write_barrier ||
5368       check_gc_card ||
5369       (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5370     // Temporary registers for the write barrier.
5371     locations->AddTemp(Location::RequiresRegister());
5372     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
5373   }
5374 }
5375 
Bswap(Location value,DataType::Type type,CpuRegister * temp)5376 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5377                                            DataType::Type type,
5378                                            CpuRegister* temp) {
5379   switch (type) {
5380     case DataType::Type::kInt16:
5381       // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5382       __ bswapl(value.AsRegister<CpuRegister>());
5383       __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5384       break;
5385     case DataType::Type::kUint16:
5386       // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5387       __ bswapl(value.AsRegister<CpuRegister>());
5388       __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5389       break;
5390     case DataType::Type::kInt32:
5391     case DataType::Type::kUint32:
5392       __ bswapl(value.AsRegister<CpuRegister>());
5393       break;
5394     case DataType::Type::kInt64:
5395     case DataType::Type::kUint64:
5396       __ bswapq(value.AsRegister<CpuRegister>());
5397       break;
5398     case DataType::Type::kFloat32: {
5399       DCHECK_NE(temp, nullptr);
5400       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5401       __ bswapl(*temp);
5402       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5403       break;
5404     }
5405     case DataType::Type::kFloat64: {
5406       DCHECK_NE(temp, nullptr);
5407       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5408       __ bswapq(*temp);
5409       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5410       break;
5411     }
5412     default:
5413       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5414       UNREACHABLE();
5415   }
5416 }
5417 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5418 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5419                                                     uint32_t value_index,
5420                                                     uint32_t extra_temp_index,
5421                                                     DataType::Type field_type,
5422                                                     Address field_addr,
5423                                                     CpuRegister base,
5424                                                     bool is_volatile,
5425                                                     bool is_atomic,
5426                                                     bool value_can_be_null,
5427                                                     bool byte_swap,
5428                                                     WriteBarrierKind write_barrier_kind) {
5429   LocationSummary* locations = instruction->GetLocations();
5430   Location value = locations->InAt(value_index);
5431 
5432   if (is_volatile) {
5433     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5434   }
5435 
5436   bool maybe_record_implicit_null_check_done = false;
5437 
5438   if (value.IsConstant()) {
5439     switch (field_type) {
5440       case DataType::Type::kBool:
5441       case DataType::Type::kUint8:
5442       case DataType::Type::kInt8:
5443         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5444         break;
5445       case DataType::Type::kUint16:
5446       case DataType::Type::kInt16: {
5447         int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5448         if (byte_swap) {
5449           v = BSWAP(v);
5450         }
5451         __ movw(field_addr, Immediate(v));
5452         break;
5453       }
5454       case DataType::Type::kUint32:
5455       case DataType::Type::kInt32:
5456       case DataType::Type::kFloat32:
5457       case DataType::Type::kReference: {
5458         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5459         if (byte_swap) {
5460           v = BSWAP(v);
5461         }
5462         DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5463         // Note: if heap poisoning is enabled, no need to poison
5464         // (negate) `v` if it is a reference, as it would be null.
5465         __ movl(field_addr, Immediate(v));
5466         break;
5467       }
5468       case DataType::Type::kUint64:
5469       case DataType::Type::kInt64:
5470       case DataType::Type::kFloat64: {
5471         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5472         if (byte_swap) {
5473           v = BSWAP(v);
5474         }
5475         if (is_atomic) {
5476           // Move constant into a register, then atomically store the register to memory.
5477           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5478           __ movq(temp, Immediate(v));
5479           __ movq(field_addr, temp);
5480         } else {
5481           Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5482           codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5483         }
5484         maybe_record_implicit_null_check_done = true;
5485         break;
5486       }
5487       case DataType::Type::kVoid:
5488         LOG(FATAL) << "Unreachable type " << field_type;
5489         UNREACHABLE();
5490     }
5491   } else {
5492     if (byte_swap) {
5493       // Swap byte order in-place in the input register (we will restore it later).
5494       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5495       Bswap(value, field_type, &temp);
5496     }
5497 
5498     switch (field_type) {
5499       case DataType::Type::kBool:
5500       case DataType::Type::kUint8:
5501       case DataType::Type::kInt8:
5502         __ movb(field_addr, value.AsRegister<CpuRegister>());
5503         break;
5504       case DataType::Type::kUint16:
5505       case DataType::Type::kInt16:
5506         __ movw(field_addr, value.AsRegister<CpuRegister>());
5507         break;
5508       case DataType::Type::kUint32:
5509       case DataType::Type::kInt32:
5510       case DataType::Type::kReference:
5511         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5512           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5513           __ movl(temp, value.AsRegister<CpuRegister>());
5514           __ PoisonHeapReference(temp);
5515           __ movl(field_addr, temp);
5516         } else {
5517           __ movl(field_addr, value.AsRegister<CpuRegister>());
5518         }
5519         break;
5520       case DataType::Type::kUint64:
5521       case DataType::Type::kInt64:
5522         __ movq(field_addr, value.AsRegister<CpuRegister>());
5523         break;
5524       case DataType::Type::kFloat32:
5525         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5526         break;
5527       case DataType::Type::kFloat64:
5528         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5529         break;
5530       case DataType::Type::kVoid:
5531         LOG(FATAL) << "Unreachable type " << field_type;
5532         UNREACHABLE();
5533     }
5534 
5535     if (byte_swap) {
5536       // Restore byte order.
5537       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5538       Bswap(value, field_type, &temp);
5539     }
5540   }
5541 
5542   if (!maybe_record_implicit_null_check_done) {
5543     codegen_->MaybeRecordImplicitNullCheck(instruction);
5544   }
5545 
5546   bool needs_write_barrier =
5547       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5548   if (needs_write_barrier) {
5549     if (value.IsConstant()) {
5550       DCHECK(value.GetConstant()->IsNullConstant());
5551       if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5552         DCHECK_NE(extra_temp_index, 0u);
5553         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5554         CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5555         codegen_->MarkGCCard(temp, card, base);
5556       }
5557     } else {
5558       DCHECK_NE(extra_temp_index, 0u);
5559       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5560       CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5561       codegen_->MaybeMarkGCCard(
5562           temp,
5563           card,
5564           base,
5565           value.AsRegister<CpuRegister>(),
5566           value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5567     }
5568   } else if (codegen_->ShouldCheckGCCard(
5569                  field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5570     DCHECK_NE(extra_temp_index, 0u);
5571     DCHECK(value.IsRegister());
5572     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5573     CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5574     codegen_->CheckGCCardIsValid(temp, card, base);
5575   }
5576 
5577   if (is_volatile) {
5578     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5579   }
5580 }
5581 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5582 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5583                                                     const FieldInfo& field_info,
5584                                                     bool value_can_be_null,
5585                                                     WriteBarrierKind write_barrier_kind) {
5586   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5587 
5588   LocationSummary* locations = instruction->GetLocations();
5589   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5590   bool is_volatile = field_info.IsVolatile();
5591   DataType::Type field_type = field_info.GetFieldType();
5592   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5593 
5594   HandleFieldSet(instruction,
5595                  /*value_index=*/ 1,
5596                  /*extra_temp_index=*/ 1,
5597                  field_type,
5598                  Address(base, offset),
5599                  base,
5600                  is_volatile,
5601                  /*is_atomic=*/ false,
5602                  value_can_be_null,
5603                  /*byte_swap=*/ false,
5604                  write_barrier_kind);
5605 }
5606 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5607 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5608   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5609 }
5610 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5611 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5612   HandleFieldSet(instruction,
5613                  instruction->GetFieldInfo(),
5614                  instruction->GetValueCanBeNull(),
5615                  instruction->GetWriteBarrierKind());
5616 }
5617 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5618 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5619   HandleFieldGet(instruction);
5620 }
5621 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5622 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5623   HandleFieldGet(instruction, instruction->GetFieldInfo());
5624 }
5625 
VisitStaticFieldGet(HStaticFieldGet * instruction)5626 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5627   HandleFieldGet(instruction);
5628 }
5629 
VisitStaticFieldGet(HStaticFieldGet * instruction)5630 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5631   HandleFieldGet(instruction, instruction->GetFieldInfo());
5632 }
5633 
VisitStaticFieldSet(HStaticFieldSet * instruction)5634 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5635   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5636 }
5637 
VisitStaticFieldSet(HStaticFieldSet * instruction)5638 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5639   HandleFieldSet(instruction,
5640                  instruction->GetFieldInfo(),
5641                  instruction->GetValueCanBeNull(),
5642                  instruction->GetWriteBarrierKind());
5643 }
5644 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5645 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5646   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5647 }
5648 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5649 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5650   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5651   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5652 }
5653 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5654 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5655     HUnresolvedInstanceFieldGet* instruction) {
5656   FieldAccessCallingConventionX86_64 calling_convention;
5657   codegen_->CreateUnresolvedFieldLocationSummary(
5658       instruction, instruction->GetFieldType(), calling_convention);
5659 }
5660 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5661 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5662     HUnresolvedInstanceFieldGet* instruction) {
5663   FieldAccessCallingConventionX86_64 calling_convention;
5664   codegen_->GenerateUnresolvedFieldAccess(instruction,
5665                                           instruction->GetFieldType(),
5666                                           instruction->GetFieldIndex(),
5667                                           instruction->GetDexPc(),
5668                                           calling_convention);
5669 }
5670 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5671 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5672     HUnresolvedInstanceFieldSet* instruction) {
5673   FieldAccessCallingConventionX86_64 calling_convention;
5674   codegen_->CreateUnresolvedFieldLocationSummary(
5675       instruction, instruction->GetFieldType(), calling_convention);
5676 }
5677 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5678 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5679     HUnresolvedInstanceFieldSet* instruction) {
5680   FieldAccessCallingConventionX86_64 calling_convention;
5681   codegen_->GenerateUnresolvedFieldAccess(instruction,
5682                                           instruction->GetFieldType(),
5683                                           instruction->GetFieldIndex(),
5684                                           instruction->GetDexPc(),
5685                                           calling_convention);
5686 }
5687 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5688 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5689     HUnresolvedStaticFieldGet* instruction) {
5690   FieldAccessCallingConventionX86_64 calling_convention;
5691   codegen_->CreateUnresolvedFieldLocationSummary(
5692       instruction, instruction->GetFieldType(), calling_convention);
5693 }
5694 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5695 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5696     HUnresolvedStaticFieldGet* instruction) {
5697   FieldAccessCallingConventionX86_64 calling_convention;
5698   codegen_->GenerateUnresolvedFieldAccess(instruction,
5699                                           instruction->GetFieldType(),
5700                                           instruction->GetFieldIndex(),
5701                                           instruction->GetDexPc(),
5702                                           calling_convention);
5703 }
5704 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5705 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5706     HUnresolvedStaticFieldSet* instruction) {
5707   FieldAccessCallingConventionX86_64 calling_convention;
5708   codegen_->CreateUnresolvedFieldLocationSummary(
5709       instruction, instruction->GetFieldType(), calling_convention);
5710 }
5711 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5712 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5713     HUnresolvedStaticFieldSet* instruction) {
5714   FieldAccessCallingConventionX86_64 calling_convention;
5715   codegen_->GenerateUnresolvedFieldAccess(instruction,
5716                                           instruction->GetFieldType(),
5717                                           instruction->GetFieldIndex(),
5718                                           instruction->GetDexPc(),
5719                                           calling_convention);
5720 }
5721 
VisitNullCheck(HNullCheck * instruction)5722 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5723   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5724   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5725       ? Location::RequiresRegister()
5726       : Location::Any();
5727   locations->SetInAt(0, loc);
5728 }
5729 
GenerateImplicitNullCheck(HNullCheck * instruction)5730 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5731   if (CanMoveNullCheckToUser(instruction)) {
5732     return;
5733   }
5734   LocationSummary* locations = instruction->GetLocations();
5735   Location obj = locations->InAt(0);
5736 
5737   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5738   RecordPcInfo(instruction, instruction->GetDexPc());
5739 }
5740 
GenerateExplicitNullCheck(HNullCheck * instruction)5741 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5742   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5743   AddSlowPath(slow_path);
5744 
5745   LocationSummary* locations = instruction->GetLocations();
5746   Location obj = locations->InAt(0);
5747 
5748   if (obj.IsRegister()) {
5749     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5750   } else if (obj.IsStackSlot()) {
5751     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5752   } else {
5753     DCHECK(obj.IsConstant()) << obj;
5754     DCHECK(obj.GetConstant()->IsNullConstant());
5755     __ jmp(slow_path->GetEntryLabel());
5756     return;
5757   }
5758   __ j(kEqual, slow_path->GetEntryLabel());
5759 }
5760 
VisitNullCheck(HNullCheck * instruction)5761 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5762   codegen_->GenerateNullCheck(instruction);
5763 }
5764 
VisitArrayGet(HArrayGet * instruction)5765 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5766   bool object_array_get_with_read_barrier =
5767       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5768   LocationSummary* locations =
5769       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5770                                                        object_array_get_with_read_barrier
5771                                                            ? LocationSummary::kCallOnSlowPath
5772                                                            : LocationSummary::kNoCall);
5773   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5774     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5775   }
5776   locations->SetInAt(0, Location::RequiresRegister());
5777   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5778   if (DataType::IsFloatingPointType(instruction->GetType())) {
5779     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5780   } else {
5781     // The output overlaps for an object array get when read barriers
5782     // are enabled: we do not want the move to overwrite the array's
5783     // location, as we need it to emit the read barrier.
5784     locations->SetOut(
5785         Location::RequiresRegister(),
5786         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5787   }
5788 }
5789 
VisitArrayGet(HArrayGet * instruction)5790 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5791   LocationSummary* locations = instruction->GetLocations();
5792   Location obj_loc = locations->InAt(0);
5793   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5794   Location index = locations->InAt(1);
5795   Location out_loc = locations->Out();
5796   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5797 
5798   DataType::Type type = instruction->GetType();
5799   if (type == DataType::Type::kReference) {
5800     static_assert(
5801         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5802         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5803     // /* HeapReference<Object> */ out =
5804     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5805     if (codegen_->EmitBakerReadBarrier()) {
5806       // Note that a potential implicit null check is handled in this
5807       // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5808       codegen_->GenerateArrayLoadWithBakerReadBarrier(
5809           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5810     } else {
5811       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5812       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5813       codegen_->MaybeRecordImplicitNullCheck(instruction);
5814       // If read barriers are enabled, emit read barriers other than
5815       // Baker's using a slow path (and also unpoison the loaded
5816       // reference, if heap poisoning is enabled).
5817       if (index.IsConstant()) {
5818         uint32_t offset =
5819             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5820         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5821       } else {
5822         codegen_->MaybeGenerateReadBarrierSlow(
5823             instruction, out_loc, out_loc, obj_loc, data_offset, index);
5824       }
5825     }
5826   } else {
5827     if (type == DataType::Type::kUint16
5828         && mirror::kUseStringCompression
5829         && instruction->IsStringCharAt()) {
5830       // Branch cases into compressed and uncompressed for each index's type.
5831       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5832       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5833       NearLabel done, not_compressed;
5834       __ testb(Address(obj, count_offset), Immediate(1));
5835       codegen_->MaybeRecordImplicitNullCheck(instruction);
5836       static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5837                     "Expecting 0=compressed, 1=uncompressed");
5838       __ j(kNotZero, &not_compressed);
5839       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5840       __ jmp(&done);
5841       __ Bind(&not_compressed);
5842       __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5843       __ Bind(&done);
5844     } else {
5845       ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5846       Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5847       codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5848     }
5849     codegen_->MaybeRecordImplicitNullCheck(instruction);
5850   }
5851 }
5852 
VisitArraySet(HArraySet * instruction)5853 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5854   DataType::Type value_type = instruction->GetComponentType();
5855 
5856   WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5857   bool needs_write_barrier =
5858       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5859   bool check_gc_card =
5860       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5861   bool needs_type_check = instruction->NeedsTypeCheck();
5862 
5863   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5864       instruction,
5865       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5866 
5867   locations->SetInAt(0, Location::RequiresRegister());
5868   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5869   if (DataType::IsFloatingPointType(value_type)) {
5870     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5871   } else {
5872     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5873   }
5874 
5875   if (needs_write_barrier || check_gc_card) {
5876     // Used by reference poisoning, type checking, emitting write barrier, or checking write
5877     // barrier.
5878     locations->AddTemp(Location::RequiresRegister());
5879     // Only used when emitting a write barrier, or when checking for the card table.
5880     locations->AddTemp(Location::RequiresRegister());
5881   } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5882              instruction->NeedsTypeCheck()) {
5883     // Used for poisoning or type checking.
5884     locations->AddTemp(Location::RequiresRegister());
5885   }
5886 }
5887 
VisitArraySet(HArraySet * instruction)5888 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5889   LocationSummary* locations = instruction->GetLocations();
5890   Location array_loc = locations->InAt(0);
5891   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5892   Location index = locations->InAt(1);
5893   Location value = locations->InAt(2);
5894   DataType::Type value_type = instruction->GetComponentType();
5895   bool needs_type_check = instruction->NeedsTypeCheck();
5896   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5897   bool needs_write_barrier =
5898       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5899 
5900   switch (value_type) {
5901     case DataType::Type::kBool:
5902     case DataType::Type::kUint8:
5903     case DataType::Type::kInt8: {
5904       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5905       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5906       if (value.IsRegister()) {
5907         __ movb(address, value.AsRegister<CpuRegister>());
5908       } else {
5909         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5910       }
5911       codegen_->MaybeRecordImplicitNullCheck(instruction);
5912       break;
5913     }
5914 
5915     case DataType::Type::kUint16:
5916     case DataType::Type::kInt16: {
5917       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5918       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5919       if (value.IsRegister()) {
5920         __ movw(address, value.AsRegister<CpuRegister>());
5921       } else {
5922         DCHECK(value.IsConstant()) << value;
5923         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5924       }
5925       codegen_->MaybeRecordImplicitNullCheck(instruction);
5926       break;
5927     }
5928 
5929     case DataType::Type::kReference: {
5930       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5931       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5932 
5933       if (!value.IsRegister()) {
5934         // Just setting null.
5935         DCHECK(instruction->InputAt(2)->IsNullConstant());
5936         DCHECK(value.IsConstant()) << value;
5937         __ movl(address, Immediate(0));
5938         codegen_->MaybeRecordImplicitNullCheck(instruction);
5939         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5940           // We need to set a write barrier here even though we are writing null, since this write
5941           // barrier is being relied on.
5942           DCHECK(needs_write_barrier);
5943           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5944           CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5945           codegen_->MarkGCCard(temp, card, array);
5946         }
5947         DCHECK(!needs_type_check);
5948         break;
5949       }
5950 
5951       CpuRegister register_value = value.AsRegister<CpuRegister>();
5952       const bool can_value_be_null = instruction->GetValueCanBeNull();
5953       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
5954       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
5955       // value is null for the type check).
5956       const bool skip_marking_gc_card =
5957           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
5958       NearLabel do_store;
5959       NearLabel skip_writing_card;
5960       if (can_value_be_null) {
5961         __ testl(register_value, register_value);
5962         if (skip_marking_gc_card) {
5963           __ j(kEqual, &skip_writing_card);
5964         } else {
5965           __ j(kEqual, &do_store);
5966         }
5967       }
5968 
5969       SlowPathCode* slow_path = nullptr;
5970       if (needs_type_check) {
5971         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5972         codegen_->AddSlowPath(slow_path);
5973 
5974         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5975         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5976         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5977 
5978         // Note that when Baker read barriers are enabled, the type
5979         // checks are performed without read barriers.  This is fine,
5980         // even in the case where a class object is in the from-space
5981         // after the flip, as a comparison involving such a type would
5982         // not produce a false positive; it may of course produce a
5983         // false negative, in which case we would take the ArraySet
5984         // slow path.
5985 
5986         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5987         // /* HeapReference<Class> */ temp = array->klass_
5988         __ movl(temp, Address(array, class_offset));
5989         codegen_->MaybeRecordImplicitNullCheck(instruction);
5990         __ MaybeUnpoisonHeapReference(temp);
5991 
5992         // /* HeapReference<Class> */ temp = temp->component_type_
5993         __ movl(temp, Address(temp, component_offset));
5994         // If heap poisoning is enabled, no need to unpoison `temp`
5995         // nor the object reference in `register_value->klass`, as
5996         // we are comparing two poisoned references.
5997         __ cmpl(temp, Address(register_value, class_offset));
5998 
5999         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6000           NearLabel do_put;
6001           __ j(kEqual, &do_put);
6002           // If heap poisoning is enabled, the `temp` reference has
6003           // not been unpoisoned yet; unpoison it now.
6004           __ MaybeUnpoisonHeapReference(temp);
6005 
6006           // If heap poisoning is enabled, no need to unpoison the
6007           // heap reference loaded below, as it is only used for a
6008           // comparison with null.
6009           __ cmpl(Address(temp, super_offset), Immediate(0));
6010           __ j(kNotEqual, slow_path->GetEntryLabel());
6011           __ Bind(&do_put);
6012         } else {
6013           __ j(kNotEqual, slow_path->GetEntryLabel());
6014         }
6015       }
6016 
6017       if (can_value_be_null && !skip_marking_gc_card) {
6018         DCHECK(do_store.IsLinked());
6019         __ Bind(&do_store);
6020       }
6021 
6022       if (needs_write_barrier) {
6023         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6024         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6025         codegen_->MarkGCCard(temp, card, array);
6026       } else if (codegen_->ShouldCheckGCCard(
6027                      value_type, instruction->GetValue(), write_barrier_kind)) {
6028         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6029         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6030         codegen_->CheckGCCardIsValid(temp, card, array);
6031       }
6032 
6033       if (skip_marking_gc_card) {
6034         // Note that we don't check that the GC card is valid as it can be correctly clean.
6035         DCHECK(skip_writing_card.IsLinked());
6036         __ Bind(&skip_writing_card);
6037       }
6038 
6039       Location source = value;
6040       if (kPoisonHeapReferences) {
6041         Location temp_loc = locations->GetTemp(0);
6042         CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6043         __ movl(temp, register_value);
6044         __ PoisonHeapReference(temp);
6045         source = temp_loc;
6046       }
6047 
6048       __ movl(address, source.AsRegister<CpuRegister>());
6049 
6050       if (can_value_be_null || !needs_type_check) {
6051         codegen_->MaybeRecordImplicitNullCheck(instruction);
6052       }
6053 
6054       if (slow_path != nullptr) {
6055         __ Bind(slow_path->GetExitLabel());
6056       }
6057 
6058       break;
6059     }
6060 
6061     case DataType::Type::kInt32: {
6062       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6063       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6064       if (value.IsRegister()) {
6065         __ movl(address, value.AsRegister<CpuRegister>());
6066       } else {
6067         DCHECK(value.IsConstant()) << value;
6068         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6069         __ movl(address, Immediate(v));
6070       }
6071       codegen_->MaybeRecordImplicitNullCheck(instruction);
6072       break;
6073     }
6074 
6075     case DataType::Type::kInt64: {
6076       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6077       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6078       if (value.IsRegister()) {
6079         __ movq(address, value.AsRegister<CpuRegister>());
6080         codegen_->MaybeRecordImplicitNullCheck(instruction);
6081       } else {
6082         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6083         Address address_high =
6084             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6085         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6086       }
6087       break;
6088     }
6089 
6090     case DataType::Type::kFloat32: {
6091       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6092       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6093       if (value.IsFpuRegister()) {
6094         __ movss(address, value.AsFpuRegister<XmmRegister>());
6095       } else {
6096         DCHECK(value.IsConstant());
6097         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6098         __ movl(address, Immediate(v));
6099       }
6100       codegen_->MaybeRecordImplicitNullCheck(instruction);
6101       break;
6102     }
6103 
6104     case DataType::Type::kFloat64: {
6105       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6106       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6107       if (value.IsFpuRegister()) {
6108         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6109         codegen_->MaybeRecordImplicitNullCheck(instruction);
6110       } else {
6111         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6112         Address address_high =
6113             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6114         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6115       }
6116       break;
6117     }
6118 
6119     case DataType::Type::kUint32:
6120     case DataType::Type::kUint64:
6121     case DataType::Type::kVoid:
6122       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6123       UNREACHABLE();
6124   }
6125 }
6126 
VisitArrayLength(HArrayLength * instruction)6127 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6128   LocationSummary* locations =
6129       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6130   locations->SetInAt(0, Location::RequiresRegister());
6131   if (!instruction->IsEmittedAtUseSite()) {
6132     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6133   }
6134 }
6135 
VisitArrayLength(HArrayLength * instruction)6136 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6137   if (instruction->IsEmittedAtUseSite()) {
6138     return;
6139   }
6140 
6141   LocationSummary* locations = instruction->GetLocations();
6142   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6143   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6144   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6145   __ movl(out, Address(obj, offset));
6146   codegen_->MaybeRecordImplicitNullCheck(instruction);
6147   // Mask out most significant bit in case the array is String's array of char.
6148   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6149     __ shrl(out, Immediate(1));
6150   }
6151 }
6152 
VisitBoundsCheck(HBoundsCheck * instruction)6153 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6154   RegisterSet caller_saves = RegisterSet::Empty();
6155   InvokeRuntimeCallingConvention calling_convention;
6156   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6157   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6158   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6159   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6160   HInstruction* length = instruction->InputAt(1);
6161   if (!length->IsEmittedAtUseSite()) {
6162     locations->SetInAt(1, Location::RegisterOrConstant(length));
6163   }
6164 }
6165 
VisitBoundsCheck(HBoundsCheck * instruction)6166 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6167   LocationSummary* locations = instruction->GetLocations();
6168   Location index_loc = locations->InAt(0);
6169   Location length_loc = locations->InAt(1);
6170   SlowPathCode* slow_path =
6171       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6172 
6173   if (length_loc.IsConstant()) {
6174     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6175     if (index_loc.IsConstant()) {
6176       // BCE will remove the bounds check if we are guarenteed to pass.
6177       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6178       if (index < 0 || index >= length) {
6179         codegen_->AddSlowPath(slow_path);
6180         __ jmp(slow_path->GetEntryLabel());
6181       } else {
6182         // Some optimization after BCE may have generated this, and we should not
6183         // generate a bounds check if it is a valid range.
6184       }
6185       return;
6186     }
6187 
6188     // We have to reverse the jump condition because the length is the constant.
6189     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6190     __ cmpl(index_reg, Immediate(length));
6191     codegen_->AddSlowPath(slow_path);
6192     __ j(kAboveEqual, slow_path->GetEntryLabel());
6193   } else {
6194     HInstruction* array_length = instruction->InputAt(1);
6195     if (array_length->IsEmittedAtUseSite()) {
6196       // Address the length field in the array.
6197       DCHECK(array_length->IsArrayLength());
6198       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6199       Location array_loc = array_length->GetLocations()->InAt(0);
6200       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6201       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6202         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6203         // the string compression flag) with the in-memory length and avoid the temporary.
6204         CpuRegister length_reg = CpuRegister(TMP);
6205         __ movl(length_reg, array_len);
6206         codegen_->MaybeRecordImplicitNullCheck(array_length);
6207         __ shrl(length_reg, Immediate(1));
6208         codegen_->GenerateIntCompare(length_reg, index_loc);
6209       } else {
6210         // Checking the bound for general case:
6211         // Array of char or String's array when the compression feature off.
6212         if (index_loc.IsConstant()) {
6213           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6214           __ cmpl(array_len, Immediate(value));
6215         } else {
6216           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6217         }
6218         codegen_->MaybeRecordImplicitNullCheck(array_length);
6219       }
6220     } else {
6221       codegen_->GenerateIntCompare(length_loc, index_loc);
6222     }
6223     codegen_->AddSlowPath(slow_path);
6224     __ j(kBelowEqual, slow_path->GetEntryLabel());
6225   }
6226 }
6227 
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6228 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6229                                           CpuRegister card,
6230                                           CpuRegister object,
6231                                           CpuRegister value,
6232                                           bool emit_null_check) {
6233   NearLabel is_null;
6234   if (emit_null_check) {
6235     __ testl(value, value);
6236     __ j(kEqual, &is_null);
6237   }
6238   MarkGCCard(temp, card, object);
6239   if (emit_null_check) {
6240     __ Bind(&is_null);
6241   }
6242 }
6243 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6244 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6245   // Load the address of the card table into `card`.
6246   __ gs()->movq(card,
6247                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6248                                   /* no_rip= */ true));
6249   // Calculate the offset (in the card table) of the card corresponding to `object`.
6250   __ movq(temp, object);
6251   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6252   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6253   // `object`'s card.
6254   //
6255   // Register `card` contains the address of the card table. Note that the card
6256   // table's base is biased during its creation so that it always starts at an
6257   // address whose least-significant byte is equal to `kCardDirty` (see
6258   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6259   // below writes the `kCardDirty` (byte) value into the `object`'s card
6260   // (located at `card + object >> kCardShift`).
6261   //
6262   // This dual use of the value in register `card` (1. to calculate the location
6263   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6264   // (no need to explicitly load `kCardDirty` as an immediate value).
6265   __ movb(Address(temp, card, TIMES_1, 0), card);
6266 }
6267 
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6268 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6269                                              CpuRegister card,
6270                                              CpuRegister object) {
6271   NearLabel done;
6272   // Load the address of the card table into `card`.
6273   __ gs()->movq(card,
6274                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6275                                   /* no_rip= */ true));
6276   // Calculate the offset (in the card table) of the card corresponding to `object`.
6277   __ movq(temp, object);
6278   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6279   // assert (!clean || !self->is_gc_marking)
6280   __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6281   __ j(kNotEqual, &done);
6282   __ gs()->cmpl(
6283       Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6284       Immediate(0));
6285   __ j(kEqual, &done);
6286   __ int3();
6287   __ Bind(&done);
6288 }
6289 
VisitParallelMove(HParallelMove * instruction)6290 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6291   LOG(FATAL) << "Unimplemented";
6292 }
6293 
VisitParallelMove(HParallelMove * instruction)6294 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6295   if (instruction->GetNext()->IsSuspendCheck() &&
6296       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6297     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6298     // The back edge will generate the suspend check.
6299     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6300   }
6301 
6302   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6303 }
6304 
VisitSuspendCheck(HSuspendCheck * instruction)6305 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6306   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6307       instruction, LocationSummary::kCallOnSlowPath);
6308   // In suspend check slow path, usually there are no caller-save registers at all.
6309   // If SIMD instructions are present, however, we force spilling all live SIMD
6310   // registers in full width (since the runtime only saves/restores lower part).
6311   locations->SetCustomSlowPathCallerSaves(
6312       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6313 }
6314 
VisitSuspendCheck(HSuspendCheck * instruction)6315 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6316   HBasicBlock* block = instruction->GetBlock();
6317   if (block->GetLoopInformation() != nullptr) {
6318     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6319     // The back edge will generate the suspend check.
6320     return;
6321   }
6322   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6323     // The goto will generate the suspend check.
6324     return;
6325   }
6326   GenerateSuspendCheck(instruction, nullptr);
6327 }
6328 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6329 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6330                                                           HBasicBlock* successor) {
6331   SuspendCheckSlowPathX86_64* slow_path =
6332       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6333   if (slow_path == nullptr) {
6334     slow_path =
6335         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6336     instruction->SetSlowPath(slow_path);
6337     codegen_->AddSlowPath(slow_path);
6338     if (successor != nullptr) {
6339       DCHECK(successor->IsLoopHeader());
6340     }
6341   } else {
6342     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6343   }
6344 
6345   __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6346                                    /* no_rip= */ true),
6347                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6348   if (successor == nullptr) {
6349     __ j(kNotZero, slow_path->GetEntryLabel());
6350     __ Bind(slow_path->GetReturnLabel());
6351   } else {
6352     __ j(kZero, codegen_->GetLabelOf(successor));
6353     __ jmp(slow_path->GetEntryLabel());
6354   }
6355 }
6356 
GetAssembler() const6357 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6358   return codegen_->GetAssembler();
6359 }
6360 
EmitMove(size_t index)6361 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6362   MoveOperands* move = moves_[index];
6363   Location source = move->GetSource();
6364   Location destination = move->GetDestination();
6365 
6366   if (source.IsRegister()) {
6367     if (destination.IsRegister()) {
6368       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6369     } else if (destination.IsStackSlot()) {
6370       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6371               source.AsRegister<CpuRegister>());
6372     } else {
6373       DCHECK(destination.IsDoubleStackSlot());
6374       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6375               source.AsRegister<CpuRegister>());
6376     }
6377   } else if (source.IsStackSlot()) {
6378     if (destination.IsRegister()) {
6379       __ movl(destination.AsRegister<CpuRegister>(),
6380               Address(CpuRegister(RSP), source.GetStackIndex()));
6381     } else if (destination.IsFpuRegister()) {
6382       __ movss(destination.AsFpuRegister<XmmRegister>(),
6383               Address(CpuRegister(RSP), source.GetStackIndex()));
6384     } else {
6385       DCHECK(destination.IsStackSlot());
6386       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6387       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6388     }
6389   } else if (source.IsDoubleStackSlot()) {
6390     if (destination.IsRegister()) {
6391       __ movq(destination.AsRegister<CpuRegister>(),
6392               Address(CpuRegister(RSP), source.GetStackIndex()));
6393     } else if (destination.IsFpuRegister()) {
6394       __ movsd(destination.AsFpuRegister<XmmRegister>(),
6395                Address(CpuRegister(RSP), source.GetStackIndex()));
6396     } else {
6397       DCHECK(destination.IsDoubleStackSlot()) << destination;
6398       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6399       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6400     }
6401   } else if (source.IsSIMDStackSlot()) {
6402     if (destination.IsFpuRegister()) {
6403       __ movups(destination.AsFpuRegister<XmmRegister>(),
6404                 Address(CpuRegister(RSP), source.GetStackIndex()));
6405     } else {
6406       DCHECK(destination.IsSIMDStackSlot());
6407       size_t high = kX86_64WordSize;
6408       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6409       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6410       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6411       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6412     }
6413   } else if (source.IsConstant()) {
6414     HConstant* constant = source.GetConstant();
6415     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6416       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6417       if (destination.IsRegister()) {
6418         if (value == 0) {
6419           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6420         } else {
6421           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6422         }
6423       } else {
6424         DCHECK(destination.IsStackSlot()) << destination;
6425         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6426       }
6427     } else if (constant->IsLongConstant()) {
6428       int64_t value = constant->AsLongConstant()->GetValue();
6429       if (destination.IsRegister()) {
6430         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6431       } else {
6432         DCHECK(destination.IsDoubleStackSlot()) << destination;
6433         codegen_->Store64BitValueToStack(destination, value);
6434       }
6435     } else if (constant->IsFloatConstant()) {
6436       float fp_value = constant->AsFloatConstant()->GetValue();
6437       if (destination.IsFpuRegister()) {
6438         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6439         codegen_->Load32BitValue(dest, fp_value);
6440       } else {
6441         DCHECK(destination.IsStackSlot()) << destination;
6442         Immediate imm(bit_cast<int32_t, float>(fp_value));
6443         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6444       }
6445     } else {
6446       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6447       double fp_value =  constant->AsDoubleConstant()->GetValue();
6448       int64_t value = bit_cast<int64_t, double>(fp_value);
6449       if (destination.IsFpuRegister()) {
6450         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6451         codegen_->Load64BitValue(dest, fp_value);
6452       } else {
6453         DCHECK(destination.IsDoubleStackSlot()) << destination;
6454         codegen_->Store64BitValueToStack(destination, value);
6455       }
6456     }
6457   } else if (source.IsFpuRegister()) {
6458     if (destination.IsFpuRegister()) {
6459       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6460     } else if (destination.IsStackSlot()) {
6461       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6462                source.AsFpuRegister<XmmRegister>());
6463     } else if (destination.IsDoubleStackSlot()) {
6464       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6465                source.AsFpuRegister<XmmRegister>());
6466     } else {
6467        DCHECK(destination.IsSIMDStackSlot());
6468       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6469                 source.AsFpuRegister<XmmRegister>());
6470     }
6471   }
6472 }
6473 
Exchange32(CpuRegister reg,int mem)6474 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6475   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6476   __ movl(Address(CpuRegister(RSP), mem), reg);
6477   __ movl(reg, CpuRegister(TMP));
6478 }
6479 
Exchange64(CpuRegister reg1,CpuRegister reg2)6480 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6481   __ movq(CpuRegister(TMP), reg1);
6482   __ movq(reg1, reg2);
6483   __ movq(reg2, CpuRegister(TMP));
6484 }
6485 
Exchange64(CpuRegister reg,int mem)6486 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6487   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6488   __ movq(Address(CpuRegister(RSP), mem), reg);
6489   __ movq(reg, CpuRegister(TMP));
6490 }
6491 
Exchange32(XmmRegister reg,int mem)6492 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6493   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6494   __ movss(Address(CpuRegister(RSP), mem), reg);
6495   __ movd(reg, CpuRegister(TMP));
6496 }
6497 
Exchange64(XmmRegister reg,int mem)6498 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6499   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6500   __ movsd(Address(CpuRegister(RSP), mem), reg);
6501   __ movd(reg, CpuRegister(TMP));
6502 }
6503 
Exchange128(XmmRegister reg,int mem)6504 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6505   size_t extra_slot = 2 * kX86_64WordSize;
6506   __ subq(CpuRegister(RSP), Immediate(extra_slot));
6507   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6508   ExchangeMemory64(0, mem + extra_slot, 2);
6509   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6510   __ addq(CpuRegister(RSP), Immediate(extra_slot));
6511 }
6512 
ExchangeMemory32(int mem1,int mem2)6513 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6514   ScratchRegisterScope ensure_scratch(
6515       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6516 
6517   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6518   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6519   __ movl(CpuRegister(ensure_scratch.GetRegister()),
6520           Address(CpuRegister(RSP), mem2 + stack_offset));
6521   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6522   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6523           CpuRegister(ensure_scratch.GetRegister()));
6524 }
6525 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6526 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6527   ScratchRegisterScope ensure_scratch(
6528       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6529 
6530   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6531 
6532   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6533   for (int i = 0; i < num_of_qwords; i++) {
6534     __ movq(CpuRegister(TMP),
6535             Address(CpuRegister(RSP), mem1 + stack_offset));
6536     __ movq(CpuRegister(ensure_scratch.GetRegister()),
6537             Address(CpuRegister(RSP), mem2 + stack_offset));
6538     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6539             CpuRegister(TMP));
6540     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6541             CpuRegister(ensure_scratch.GetRegister()));
6542     stack_offset += kX86_64WordSize;
6543   }
6544 }
6545 
EmitSwap(size_t index)6546 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6547   MoveOperands* move = moves_[index];
6548   Location source = move->GetSource();
6549   Location destination = move->GetDestination();
6550 
6551   if (source.IsRegister() && destination.IsRegister()) {
6552     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6553   } else if (source.IsRegister() && destination.IsStackSlot()) {
6554     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6555   } else if (source.IsStackSlot() && destination.IsRegister()) {
6556     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6557   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6558     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6559   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6560     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6561   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6562     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6563   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6564     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6565   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6566     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6567     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6568     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6569   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6570     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6571   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6572     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6573   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6574     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6575   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6576     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6577   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6578     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6579   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6580     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6581   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6582     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6583   } else {
6584     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6585   }
6586 }
6587 
6588 
SpillScratch(int reg)6589 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6590   __ pushq(CpuRegister(reg));
6591 }
6592 
6593 
RestoreScratch(int reg)6594 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6595   __ popq(CpuRegister(reg));
6596 }
6597 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6598 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6599     SlowPathCode* slow_path, CpuRegister class_reg) {
6600   __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6601   __ j(kBelow, slow_path->GetEntryLabel());
6602   __ Bind(slow_path->GetExitLabel());
6603 }
6604 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6605 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6606                                                                        CpuRegister temp) {
6607   uint32_t path_to_root = check->GetBitstringPathToRoot();
6608   uint32_t mask = check->GetBitstringMask();
6609   DCHECK(IsPowerOfTwo(mask + 1));
6610   size_t mask_bits = WhichPowerOf2(mask + 1);
6611 
6612   if (mask_bits == 16u) {
6613     // Compare the bitstring in memory.
6614     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6615   } else {
6616     // /* uint32_t */ temp = temp->status_
6617     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6618     // Compare the bitstring bits using SUB.
6619     __ subl(temp, Immediate(path_to_root));
6620     // Shift out bits that do not contribute to the comparison.
6621     __ shll(temp, Immediate(32u - mask_bits));
6622   }
6623 }
6624 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6625 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6626     HLoadClass::LoadKind desired_class_load_kind) {
6627   switch (desired_class_load_kind) {
6628     case HLoadClass::LoadKind::kInvalid:
6629       LOG(FATAL) << "UNREACHABLE";
6630       UNREACHABLE();
6631     case HLoadClass::LoadKind::kReferrersClass:
6632       break;
6633     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6634     case HLoadClass::LoadKind::kBootImageRelRo:
6635     case HLoadClass::LoadKind::kAppImageRelRo:
6636     case HLoadClass::LoadKind::kBssEntry:
6637     case HLoadClass::LoadKind::kBssEntryPublic:
6638     case HLoadClass::LoadKind::kBssEntryPackage:
6639       DCHECK(!GetCompilerOptions().IsJitCompiler());
6640       break;
6641     case HLoadClass::LoadKind::kJitBootImageAddress:
6642     case HLoadClass::LoadKind::kJitTableAddress:
6643       DCHECK(GetCompilerOptions().IsJitCompiler());
6644       break;
6645     case HLoadClass::LoadKind::kRuntimeCall:
6646       break;
6647   }
6648   return desired_class_load_kind;
6649 }
6650 
VisitLoadClass(HLoadClass * cls)6651 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6652   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6653   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6654     // Custom calling convention: RAX serves as both input and output.
6655     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6656         cls,
6657         Location::RegisterLocation(RAX),
6658         Location::RegisterLocation(RAX));
6659     return;
6660   }
6661   DCHECK_EQ(cls->NeedsAccessCheck(),
6662             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6663                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6664 
6665   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6666   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6667       ? LocationSummary::kCallOnSlowPath
6668       : LocationSummary::kNoCall;
6669   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6670   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6671     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6672   }
6673 
6674   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6675     locations->SetInAt(0, Location::RequiresRegister());
6676   }
6677   locations->SetOut(Location::RequiresRegister());
6678   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6679       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6680       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6681     if (codegen_->EmitNonBakerReadBarrier()) {
6682       // For non-Baker read barrier we have a temp-clobbering call.
6683     } else {
6684       // Rely on the type resolution and/or initialization to save everything.
6685       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6686     }
6687   }
6688 }
6689 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6690 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6691                                                  dex::TypeIndex type_index,
6692                                                  Handle<mirror::Class> handle) {
6693   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6694   // Add a patch entry and return the label.
6695   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6696   PatchInfo<Label>* info = &jit_class_patches_.back();
6697   return &info->label;
6698 }
6699 
6700 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6701 // move.
VisitLoadClass(HLoadClass * cls)6702 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6703   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6704   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6705     codegen_->GenerateLoadClassRuntimeCall(cls);
6706     return;
6707   }
6708   DCHECK_EQ(cls->NeedsAccessCheck(),
6709             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6710                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6711 
6712   LocationSummary* locations = cls->GetLocations();
6713   Location out_loc = locations->Out();
6714   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6715 
6716   const ReadBarrierOption read_barrier_option =
6717       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6718   bool generate_null_check = false;
6719   switch (load_kind) {
6720     case HLoadClass::LoadKind::kReferrersClass: {
6721       DCHECK(!cls->CanCallRuntime());
6722       DCHECK(!cls->MustGenerateClinitCheck());
6723       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6724       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6725       GenerateGcRootFieldLoad(
6726           cls,
6727           out_loc,
6728           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6729           /* fixup_label= */ nullptr,
6730           read_barrier_option);
6731       break;
6732     }
6733     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6734       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6735              codegen_->GetCompilerOptions().IsBootImageExtension());
6736       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6737       __ leal(out,
6738               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6739       codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6740       break;
6741     case HLoadClass::LoadKind::kBootImageRelRo: {
6742       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6743       __ movl(out,
6744               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6745       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6746       break;
6747     }
6748     case HLoadClass::LoadKind::kAppImageRelRo: {
6749       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6750       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6751       __ movl(out,
6752               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6753       codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6754       break;
6755     }
6756     case HLoadClass::LoadKind::kBssEntry:
6757     case HLoadClass::LoadKind::kBssEntryPublic:
6758     case HLoadClass::LoadKind::kBssEntryPackage: {
6759       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6760                                           /* no_rip= */ false);
6761       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6762       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6763       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6764       // No need for memory fence, thanks to the x86-64 memory model.
6765       generate_null_check = true;
6766       break;
6767     }
6768     case HLoadClass::LoadKind::kJitBootImageAddress: {
6769       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6770       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6771       DCHECK_NE(address, 0u);
6772       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6773       break;
6774     }
6775     case HLoadClass::LoadKind::kJitTableAddress: {
6776       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6777                                           /* no_rip= */ true);
6778       Label* fixup_label =
6779           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6780       // /* GcRoot<mirror::Class> */ out = *address
6781       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6782       break;
6783     }
6784     default:
6785       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6786       UNREACHABLE();
6787   }
6788 
6789   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6790     DCHECK(cls->CanCallRuntime());
6791     SlowPathCode* slow_path =
6792         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6793     codegen_->AddSlowPath(slow_path);
6794     if (generate_null_check) {
6795       __ testl(out, out);
6796       __ j(kEqual, slow_path->GetEntryLabel());
6797     }
6798     if (cls->MustGenerateClinitCheck()) {
6799       GenerateClassInitializationCheck(slow_path, out);
6800     } else {
6801       __ Bind(slow_path->GetExitLabel());
6802     }
6803   }
6804 }
6805 
VisitClinitCheck(HClinitCheck * check)6806 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6807   LocationSummary* locations =
6808       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6809   locations->SetInAt(0, Location::RequiresRegister());
6810   if (check->HasUses()) {
6811     locations->SetOut(Location::SameAsFirstInput());
6812   }
6813   // Rely on the type initialization to save everything we need.
6814   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6815 }
6816 
VisitLoadMethodHandle(HLoadMethodHandle * load)6817 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6818   // Custom calling convention: RAX serves as both input and output.
6819   Location location = Location::RegisterLocation(RAX);
6820   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6821 }
6822 
VisitLoadMethodHandle(HLoadMethodHandle * load)6823 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6824   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6825 }
6826 
VisitLoadMethodType(HLoadMethodType * load)6827 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6828   LocationSummary* locations =
6829       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6830   if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6831       Location location = Location::RegisterLocation(RAX);
6832       CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6833   } else {
6834     DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kBssEntry);
6835     locations->SetOut(Location::RequiresRegister());
6836     if (codegen_->EmitNonBakerReadBarrier()) {
6837       // For non-Baker read barrier we have a temp-clobbering call.
6838     } else {
6839       // Rely on the pResolveMethodType to save everything.
6840       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6841     }
6842   }
6843 }
6844 
VisitLoadMethodType(HLoadMethodType * load)6845 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6846   LocationSummary* locations = load->GetLocations();
6847   Location out_loc = locations->Out();
6848   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6849 
6850   switch (load->GetLoadKind()) {
6851     case HLoadMethodType::LoadKind::kBssEntry: {
6852       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6853                                           /* no_rip= */ false);
6854       Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6855       // /* GcRoot<mirror::MethodType> */ out = *address  /* PC-relative */
6856       GenerateGcRootFieldLoad(
6857           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6858       // No need for memory fence, thanks to the x86-64 memory model.
6859       SlowPathCode* slow_path =
6860           new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6861       codegen_->AddSlowPath(slow_path);
6862       __ testl(out, out);
6863       __ j(kEqual, slow_path->GetEntryLabel());
6864       __ Bind(slow_path->GetExitLabel());
6865       return;
6866     }
6867     default:
6868       DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6869       codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6870       break;
6871   }
6872 }
6873 
VisitClinitCheck(HClinitCheck * check)6874 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6875   // We assume the class to not be null.
6876   SlowPathCode* slow_path =
6877       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6878   codegen_->AddSlowPath(slow_path);
6879   GenerateClassInitializationCheck(slow_path,
6880                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6881 }
6882 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6883 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6884     HLoadString::LoadKind desired_string_load_kind) {
6885   switch (desired_string_load_kind) {
6886     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6887     case HLoadString::LoadKind::kBootImageRelRo:
6888     case HLoadString::LoadKind::kBssEntry:
6889       DCHECK(!GetCompilerOptions().IsJitCompiler());
6890       break;
6891     case HLoadString::LoadKind::kJitBootImageAddress:
6892     case HLoadString::LoadKind::kJitTableAddress:
6893       DCHECK(GetCompilerOptions().IsJitCompiler());
6894       break;
6895     case HLoadString::LoadKind::kRuntimeCall:
6896       break;
6897   }
6898   return desired_string_load_kind;
6899 }
6900 
VisitLoadString(HLoadString * load)6901 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6902   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6903   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6904   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6905     locations->SetOut(Location::RegisterLocation(RAX));
6906   } else {
6907     locations->SetOut(Location::RequiresRegister());
6908     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6909       if (codegen_->EmitNonBakerReadBarrier()) {
6910         // For non-Baker read barrier we have a temp-clobbering call.
6911       } else {
6912         // Rely on the pResolveString to save everything.
6913         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6914       }
6915     }
6916   }
6917 }
6918 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6919 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6920                                                   dex::StringIndex string_index,
6921                                                   Handle<mirror::String> handle) {
6922   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6923   // Add a patch entry and return the label.
6924   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6925   PatchInfo<Label>* info = &jit_string_patches_.back();
6926   return &info->label;
6927 }
6928 
6929 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6930 // move.
VisitLoadString(HLoadString * load)6931 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6932   LocationSummary* locations = load->GetLocations();
6933   Location out_loc = locations->Out();
6934   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6935 
6936   switch (load->GetLoadKind()) {
6937     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6938       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6939              codegen_->GetCompilerOptions().IsBootImageExtension());
6940       __ leal(out,
6941               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6942       codegen_->RecordBootImageStringPatch(load);
6943       return;
6944     }
6945     case HLoadString::LoadKind::kBootImageRelRo: {
6946       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6947       __ movl(out,
6948               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6949       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6950       return;
6951     }
6952     case HLoadString::LoadKind::kBssEntry: {
6953       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6954                                           /* no_rip= */ false);
6955       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6956       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6957       GenerateGcRootFieldLoad(
6958           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6959       // No need for memory fence, thanks to the x86-64 memory model.
6960       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6961       codegen_->AddSlowPath(slow_path);
6962       __ testl(out, out);
6963       __ j(kEqual, slow_path->GetEntryLabel());
6964       __ Bind(slow_path->GetExitLabel());
6965       return;
6966     }
6967     case HLoadString::LoadKind::kJitBootImageAddress: {
6968       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6969       DCHECK_NE(address, 0u);
6970       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6971       return;
6972     }
6973     case HLoadString::LoadKind::kJitTableAddress: {
6974       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6975                                           /* no_rip= */ true);
6976       Label* fixup_label = codegen_->NewJitRootStringPatch(
6977           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6978       // /* GcRoot<mirror::String> */ out = *address
6979       GenerateGcRootFieldLoad(
6980           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6981       return;
6982     }
6983     default:
6984       break;
6985   }
6986 
6987   // Custom calling convention: RAX serves as both input and output.
6988   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6989   codegen_->InvokeRuntime(kQuickResolveString,
6990                           load,
6991                           load->GetDexPc());
6992   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6993 }
6994 
GetExceptionTlsAddress()6995 static Address GetExceptionTlsAddress() {
6996   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6997                            /* no_rip= */ true);
6998 }
6999 
VisitLoadException(HLoadException * load)7000 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7001   LocationSummary* locations =
7002       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7003   locations->SetOut(Location::RequiresRegister());
7004 }
7005 
VisitLoadException(HLoadException * load)7006 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7007   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7008 }
7009 
VisitClearException(HClearException * clear)7010 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7011   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7012 }
7013 
VisitClearException(HClearException * clear)7014 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7015   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7016 }
7017 
VisitThrow(HThrow * instruction)7018 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7019   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7020       instruction, LocationSummary::kCallOnMainOnly);
7021   InvokeRuntimeCallingConvention calling_convention;
7022   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7023 }
7024 
VisitThrow(HThrow * instruction)7025 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7026   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7027   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7028 }
7029 
7030 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7031 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7032   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7033     return 1;
7034   }
7035   if (emit_read_barrier &&
7036       !kUseBakerReadBarrier &&
7037       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7038        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7039        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7040     return 1;
7041   }
7042   return 0;
7043 }
7044 
7045 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7046 // interface pointer, the current interface is compared in memory.
7047 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7048 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7049   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7050 }
7051 
VisitInstanceOf(HInstanceOf * instruction)7052 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7053   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7054   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7055   bool baker_read_barrier_slow_path = false;
7056   switch (type_check_kind) {
7057     case TypeCheckKind::kExactCheck:
7058     case TypeCheckKind::kAbstractClassCheck:
7059     case TypeCheckKind::kClassHierarchyCheck:
7060     case TypeCheckKind::kArrayObjectCheck:
7061     case TypeCheckKind::kInterfaceCheck: {
7062       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7063       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7064       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7065                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
7066       break;
7067     }
7068     case TypeCheckKind::kArrayCheck:
7069     case TypeCheckKind::kUnresolvedCheck:
7070       call_kind = LocationSummary::kCallOnSlowPath;
7071       break;
7072     case TypeCheckKind::kBitstringCheck:
7073       break;
7074   }
7075 
7076   LocationSummary* locations =
7077       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7078   if (baker_read_barrier_slow_path) {
7079     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7080   }
7081   locations->SetInAt(0, Location::RequiresRegister());
7082   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7083     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7084     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7085     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7086   } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7087     locations->SetInAt(1, Location::RequiresRegister());
7088   } else {
7089     locations->SetInAt(1, Location::Any());
7090   }
7091   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7092   locations->SetOut(Location::RequiresRegister());
7093   locations->AddRegisterTemps(
7094       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7095 }
7096 
VisitInstanceOf(HInstanceOf * instruction)7097 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7098   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7099   LocationSummary* locations = instruction->GetLocations();
7100   Location obj_loc = locations->InAt(0);
7101   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7102   Location cls = locations->InAt(1);
7103   Location out_loc =  locations->Out();
7104   CpuRegister out = out_loc.AsRegister<CpuRegister>();
7105   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7106   DCHECK_LE(num_temps, 1u);
7107   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7108   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7109   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7110   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7111   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7112   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7113   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7114   const uint32_t object_array_data_offset =
7115       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7116   SlowPathCode* slow_path = nullptr;
7117   NearLabel done, zero;
7118 
7119   // Return 0 if `obj` is null.
7120   // Avoid null check if we know obj is not null.
7121   if (instruction->MustDoNullCheck()) {
7122     __ testl(obj, obj);
7123     __ j(kEqual, &zero);
7124   }
7125 
7126   switch (type_check_kind) {
7127     case TypeCheckKind::kExactCheck: {
7128       ReadBarrierOption read_barrier_option =
7129           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7130       // /* HeapReference<Class> */ out = obj->klass_
7131       GenerateReferenceLoadTwoRegisters(instruction,
7132                                         out_loc,
7133                                         obj_loc,
7134                                         class_offset,
7135                                         read_barrier_option);
7136       if (cls.IsRegister()) {
7137         __ cmpl(out, cls.AsRegister<CpuRegister>());
7138       } else {
7139         DCHECK(cls.IsStackSlot()) << cls;
7140         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7141       }
7142       if (zero.IsLinked()) {
7143         // Classes must be equal for the instanceof to succeed.
7144         __ j(kNotEqual, &zero);
7145         __ movl(out, Immediate(1));
7146         __ jmp(&done);
7147       } else {
7148         __ setcc(kEqual, out);
7149         // setcc only sets the low byte.
7150         __ andl(out, Immediate(1));
7151       }
7152       break;
7153     }
7154 
7155     case TypeCheckKind::kAbstractClassCheck: {
7156       ReadBarrierOption read_barrier_option =
7157           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7158       // /* HeapReference<Class> */ out = obj->klass_
7159       GenerateReferenceLoadTwoRegisters(instruction,
7160                                         out_loc,
7161                                         obj_loc,
7162                                         class_offset,
7163                                         read_barrier_option);
7164       // If the class is abstract, we eagerly fetch the super class of the
7165       // object to avoid doing a comparison we know will fail.
7166       NearLabel loop, success;
7167       __ Bind(&loop);
7168       // /* HeapReference<Class> */ out = out->super_class_
7169       GenerateReferenceLoadOneRegister(instruction,
7170                                        out_loc,
7171                                        super_offset,
7172                                        maybe_temp_loc,
7173                                        read_barrier_option);
7174       __ testl(out, out);
7175       // If `out` is null, we use it for the result, and jump to `done`.
7176       __ j(kEqual, &done);
7177       if (cls.IsRegister()) {
7178         __ cmpl(out, cls.AsRegister<CpuRegister>());
7179       } else {
7180         DCHECK(cls.IsStackSlot()) << cls;
7181         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7182       }
7183       __ j(kNotEqual, &loop);
7184       __ movl(out, Immediate(1));
7185       if (zero.IsLinked()) {
7186         __ jmp(&done);
7187       }
7188       break;
7189     }
7190 
7191     case TypeCheckKind::kClassHierarchyCheck: {
7192       ReadBarrierOption read_barrier_option =
7193           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7194       // /* HeapReference<Class> */ out = obj->klass_
7195       GenerateReferenceLoadTwoRegisters(instruction,
7196                                         out_loc,
7197                                         obj_loc,
7198                                         class_offset,
7199                                         read_barrier_option);
7200       // Walk over the class hierarchy to find a match.
7201       NearLabel loop, success;
7202       __ Bind(&loop);
7203       if (cls.IsRegister()) {
7204         __ cmpl(out, cls.AsRegister<CpuRegister>());
7205       } else {
7206         DCHECK(cls.IsStackSlot()) << cls;
7207         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7208       }
7209       __ j(kEqual, &success);
7210       // /* HeapReference<Class> */ out = out->super_class_
7211       GenerateReferenceLoadOneRegister(instruction,
7212                                        out_loc,
7213                                        super_offset,
7214                                        maybe_temp_loc,
7215                                        read_barrier_option);
7216       __ testl(out, out);
7217       __ j(kNotEqual, &loop);
7218       // If `out` is null, we use it for the result, and jump to `done`.
7219       __ jmp(&done);
7220       __ Bind(&success);
7221       __ movl(out, Immediate(1));
7222       if (zero.IsLinked()) {
7223         __ jmp(&done);
7224       }
7225       break;
7226     }
7227 
7228     case TypeCheckKind::kArrayObjectCheck: {
7229       ReadBarrierOption read_barrier_option =
7230           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7231       // /* HeapReference<Class> */ out = obj->klass_
7232       GenerateReferenceLoadTwoRegisters(instruction,
7233                                         out_loc,
7234                                         obj_loc,
7235                                         class_offset,
7236                                         read_barrier_option);
7237       // Do an exact check.
7238       NearLabel exact_check;
7239       if (cls.IsRegister()) {
7240         __ cmpl(out, cls.AsRegister<CpuRegister>());
7241       } else {
7242         DCHECK(cls.IsStackSlot()) << cls;
7243         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7244       }
7245       __ j(kEqual, &exact_check);
7246       // Otherwise, we need to check that the object's class is a non-primitive array.
7247       // /* HeapReference<Class> */ out = out->component_type_
7248       GenerateReferenceLoadOneRegister(instruction,
7249                                        out_loc,
7250                                        component_offset,
7251                                        maybe_temp_loc,
7252                                        read_barrier_option);
7253       __ testl(out, out);
7254       // If `out` is null, we use it for the result, and jump to `done`.
7255       __ j(kEqual, &done);
7256       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7257       __ j(kNotEqual, &zero);
7258       __ Bind(&exact_check);
7259       __ movl(out, Immediate(1));
7260       __ jmp(&done);
7261       break;
7262     }
7263 
7264     case TypeCheckKind::kArrayCheck: {
7265       // No read barrier since the slow path will retry upon failure.
7266       // /* HeapReference<Class> */ out = obj->klass_
7267       GenerateReferenceLoadTwoRegisters(instruction,
7268                                         out_loc,
7269                                         obj_loc,
7270                                         class_offset,
7271                                         kWithoutReadBarrier);
7272       if (cls.IsRegister()) {
7273         __ cmpl(out, cls.AsRegister<CpuRegister>());
7274       } else {
7275         DCHECK(cls.IsStackSlot()) << cls;
7276         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7277       }
7278       DCHECK(locations->OnlyCallsOnSlowPath());
7279       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7280           instruction, /* is_fatal= */ false);
7281       codegen_->AddSlowPath(slow_path);
7282       __ j(kNotEqual, slow_path->GetEntryLabel());
7283       __ movl(out, Immediate(1));
7284       if (zero.IsLinked()) {
7285         __ jmp(&done);
7286       }
7287       break;
7288     }
7289 
7290     case TypeCheckKind::kInterfaceCheck: {
7291       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7292         DCHECK(locations->OnlyCallsOnSlowPath());
7293         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7294             instruction, /* is_fatal= */ false);
7295         codegen_->AddSlowPath(slow_path);
7296         if (codegen_->EmitNonBakerReadBarrier()) {
7297           __ jmp(slow_path->GetEntryLabel());
7298           break;
7299         }
7300         // For Baker read barrier, take the slow path while marking.
7301         __ gs()->cmpl(
7302             Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7303             Immediate(0));
7304         __ j(kNotEqual, slow_path->GetEntryLabel());
7305       }
7306 
7307       // Fast-path without read barriers.
7308       CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7309       // /* HeapReference<Class> */ temp = obj->klass_
7310       __ movl(temp, Address(obj, class_offset));
7311       __ MaybeUnpoisonHeapReference(temp);
7312       // /* HeapReference<Class> */ temp = temp->iftable_
7313       __ movl(temp, Address(temp, iftable_offset));
7314       __ MaybeUnpoisonHeapReference(temp);
7315       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7316       __ movl(out, Address(temp, array_length_offset));
7317       // Maybe poison the `cls` for direct comparison with memory.
7318       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7319       // Loop through the iftable and check if any class matches.
7320       NearLabel loop, end;
7321       __ Bind(&loop);
7322       // Check if we still have an entry to compare.
7323       __ subl(out, Immediate(2));
7324       __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7325       // Go to next interface if the classes do not match.
7326       __ cmpl(cls.AsRegister<CpuRegister>(),
7327               CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7328       __ j(kNotEqual, &loop);
7329       if (zero.IsLinked()) {
7330         __ movl(out, Immediate(1));
7331         // If `cls` was poisoned above, unpoison it.
7332         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7333         __ jmp(&done);
7334         if (kPoisonHeapReferences) {
7335           // The false case needs to unpoison the class before jumping to `zero`.
7336           __ Bind(&end);
7337           __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7338           __ jmp(&zero);
7339         }
7340       } else {
7341         // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7342         __ movl(out, Immediate(-1));
7343         __ Bind(&end);
7344         __ addl(out, Immediate(2));
7345         // If `cls` was poisoned above, unpoison it.
7346         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7347       }
7348       break;
7349     }
7350 
7351     case TypeCheckKind::kUnresolvedCheck: {
7352       // Note that we indeed only call on slow path, but we always go
7353       // into the slow path for the unresolved check case.
7354       //
7355       // We cannot directly call the InstanceofNonTrivial runtime
7356       // entry point without resorting to a type checking slow path
7357       // here (i.e. by calling InvokeRuntime directly), as it would
7358       // require to assign fixed registers for the inputs of this
7359       // HInstanceOf instruction (following the runtime calling
7360       // convention), which might be cluttered by the potential first
7361       // read barrier emission at the beginning of this method.
7362       //
7363       // TODO: Introduce a new runtime entry point taking the object
7364       // to test (instead of its class) as argument, and let it deal
7365       // with the read barrier issues. This will let us refactor this
7366       // case of the `switch` code as it was previously (with a direct
7367       // call to the runtime not using a type checking slow path).
7368       // This should also be beneficial for the other cases above.
7369       DCHECK(locations->OnlyCallsOnSlowPath());
7370       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7371           instruction, /* is_fatal= */ false);
7372       codegen_->AddSlowPath(slow_path);
7373       __ jmp(slow_path->GetEntryLabel());
7374       break;
7375     }
7376 
7377     case TypeCheckKind::kBitstringCheck: {
7378       // /* HeapReference<Class> */ temp = obj->klass_
7379       GenerateReferenceLoadTwoRegisters(instruction,
7380                                         out_loc,
7381                                         obj_loc,
7382                                         class_offset,
7383                                         kWithoutReadBarrier);
7384 
7385       GenerateBitstringTypeCheckCompare(instruction, out);
7386       if (zero.IsLinked()) {
7387         __ j(kNotEqual, &zero);
7388         __ movl(out, Immediate(1));
7389         __ jmp(&done);
7390       } else {
7391         __ setcc(kEqual, out);
7392         // setcc only sets the low byte.
7393         __ andl(out, Immediate(1));
7394       }
7395       break;
7396     }
7397   }
7398 
7399   if (zero.IsLinked()) {
7400     __ Bind(&zero);
7401     __ xorl(out, out);
7402   }
7403 
7404   if (done.IsLinked()) {
7405     __ Bind(&done);
7406   }
7407 
7408   if (slow_path != nullptr) {
7409     __ Bind(slow_path->GetExitLabel());
7410   }
7411 }
7412 
VisitCheckCast(HCheckCast * instruction)7413 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7414   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7415   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7416   LocationSummary* locations =
7417       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7418   locations->SetInAt(0, Location::RequiresRegister());
7419   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7420     // Require a register for the interface check since there is a loop that compares the class to
7421     // a memory address.
7422     locations->SetInAt(1, Location::RequiresRegister());
7423   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7424     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7425     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7426     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7427   } else {
7428     locations->SetInAt(1, Location::Any());
7429   }
7430   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7431 }
7432 
VisitCheckCast(HCheckCast * instruction)7433 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7434   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7435   LocationSummary* locations = instruction->GetLocations();
7436   Location obj_loc = locations->InAt(0);
7437   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7438   Location cls = locations->InAt(1);
7439   Location temp_loc = locations->GetTemp(0);
7440   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7441   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7442   DCHECK_GE(num_temps, 1u);
7443   DCHECK_LE(num_temps, 2u);
7444   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7445   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7446   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7447   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7448   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7449   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7450   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7451   const uint32_t object_array_data_offset =
7452       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7453 
7454   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7455   SlowPathCode* type_check_slow_path =
7456       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7457           instruction, is_type_check_slow_path_fatal);
7458   codegen_->AddSlowPath(type_check_slow_path);
7459 
7460 
7461   NearLabel done;
7462   // Avoid null check if we know obj is not null.
7463   if (instruction->MustDoNullCheck()) {
7464     __ testl(obj, obj);
7465     __ j(kEqual, &done);
7466   }
7467 
7468   switch (type_check_kind) {
7469     case TypeCheckKind::kExactCheck:
7470     case TypeCheckKind::kArrayCheck: {
7471       // /* HeapReference<Class> */ temp = obj->klass_
7472       GenerateReferenceLoadTwoRegisters(instruction,
7473                                         temp_loc,
7474                                         obj_loc,
7475                                         class_offset,
7476                                         kWithoutReadBarrier);
7477       if (cls.IsRegister()) {
7478         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7479       } else {
7480         DCHECK(cls.IsStackSlot()) << cls;
7481         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7482       }
7483       // Jump to slow path for throwing the exception or doing a
7484       // more involved array check.
7485       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7486       break;
7487     }
7488 
7489     case TypeCheckKind::kAbstractClassCheck: {
7490       // /* HeapReference<Class> */ temp = obj->klass_
7491       GenerateReferenceLoadTwoRegisters(instruction,
7492                                         temp_loc,
7493                                         obj_loc,
7494                                         class_offset,
7495                                         kWithoutReadBarrier);
7496       // If the class is abstract, we eagerly fetch the super class of the
7497       // object to avoid doing a comparison we know will fail.
7498       NearLabel loop;
7499       __ Bind(&loop);
7500       // /* HeapReference<Class> */ temp = temp->super_class_
7501       GenerateReferenceLoadOneRegister(instruction,
7502                                        temp_loc,
7503                                        super_offset,
7504                                        maybe_temp2_loc,
7505                                        kWithoutReadBarrier);
7506 
7507       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7508       // exception.
7509       __ testl(temp, temp);
7510       // Otherwise, compare the classes.
7511       __ j(kZero, type_check_slow_path->GetEntryLabel());
7512       if (cls.IsRegister()) {
7513         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7514       } else {
7515         DCHECK(cls.IsStackSlot()) << cls;
7516         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7517       }
7518       __ j(kNotEqual, &loop);
7519       break;
7520     }
7521 
7522     case TypeCheckKind::kClassHierarchyCheck: {
7523       // /* HeapReference<Class> */ temp = obj->klass_
7524       GenerateReferenceLoadTwoRegisters(instruction,
7525                                         temp_loc,
7526                                         obj_loc,
7527                                         class_offset,
7528                                         kWithoutReadBarrier);
7529       // Walk over the class hierarchy to find a match.
7530       NearLabel loop;
7531       __ Bind(&loop);
7532       if (cls.IsRegister()) {
7533         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7534       } else {
7535         DCHECK(cls.IsStackSlot()) << cls;
7536         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7537       }
7538       __ j(kEqual, &done);
7539 
7540       // /* HeapReference<Class> */ temp = temp->super_class_
7541       GenerateReferenceLoadOneRegister(instruction,
7542                                        temp_loc,
7543                                        super_offset,
7544                                        maybe_temp2_loc,
7545                                        kWithoutReadBarrier);
7546 
7547       // If the class reference currently in `temp` is not null, jump
7548       // back at the beginning of the loop.
7549       __ testl(temp, temp);
7550       __ j(kNotZero, &loop);
7551       // Otherwise, jump to the slow path to throw the exception.
7552       __ jmp(type_check_slow_path->GetEntryLabel());
7553       break;
7554     }
7555 
7556     case TypeCheckKind::kArrayObjectCheck: {
7557       // /* HeapReference<Class> */ temp = obj->klass_
7558       GenerateReferenceLoadTwoRegisters(instruction,
7559                                         temp_loc,
7560                                         obj_loc,
7561                                         class_offset,
7562                                         kWithoutReadBarrier);
7563       // Do an exact check.
7564       NearLabel check_non_primitive_component_type;
7565       if (cls.IsRegister()) {
7566         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7567       } else {
7568         DCHECK(cls.IsStackSlot()) << cls;
7569         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7570       }
7571       __ j(kEqual, &done);
7572 
7573       // Otherwise, we need to check that the object's class is a non-primitive array.
7574       // /* HeapReference<Class> */ temp = temp->component_type_
7575       GenerateReferenceLoadOneRegister(instruction,
7576                                        temp_loc,
7577                                        component_offset,
7578                                        maybe_temp2_loc,
7579                                        kWithoutReadBarrier);
7580 
7581       // If the component type is not null (i.e. the object is indeed
7582       // an array), jump to label `check_non_primitive_component_type`
7583       // to further check that this component type is not a primitive
7584       // type.
7585       __ testl(temp, temp);
7586       // Otherwise, jump to the slow path to throw the exception.
7587       __ j(kZero, type_check_slow_path->GetEntryLabel());
7588       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7589       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7590       break;
7591     }
7592 
7593     case TypeCheckKind::kUnresolvedCheck: {
7594       // We always go into the type check slow path for the unresolved case.
7595       //
7596       // We cannot directly call the CheckCast runtime entry point
7597       // without resorting to a type checking slow path here (i.e. by
7598       // calling InvokeRuntime directly), as it would require to
7599       // assign fixed registers for the inputs of this HInstanceOf
7600       // instruction (following the runtime calling convention), which
7601       // might be cluttered by the potential first read barrier
7602       // emission at the beginning of this method.
7603       __ jmp(type_check_slow_path->GetEntryLabel());
7604       break;
7605     }
7606 
7607     case TypeCheckKind::kInterfaceCheck: {
7608       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7609       // We can not get false positives by doing this.
7610       // /* HeapReference<Class> */ temp = obj->klass_
7611       GenerateReferenceLoadTwoRegisters(instruction,
7612                                         temp_loc,
7613                                         obj_loc,
7614                                         class_offset,
7615                                         kWithoutReadBarrier);
7616 
7617       // /* HeapReference<Class> */ temp = temp->iftable_
7618       GenerateReferenceLoadOneRegister(instruction,
7619                                        temp_loc,
7620                                        iftable_offset,
7621                                        maybe_temp2_loc,
7622                                        kWithoutReadBarrier);
7623       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7624       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7625       // Maybe poison the `cls` for direct comparison with memory.
7626       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7627       // Loop through the iftable and check if any class matches.
7628       NearLabel start_loop;
7629       __ Bind(&start_loop);
7630       // Check if we still have an entry to compare.
7631       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7632       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7633       // Go to next interface if the classes do not match.
7634       __ cmpl(cls.AsRegister<CpuRegister>(),
7635               CodeGeneratorX86_64::ArrayAddress(temp,
7636                                                 maybe_temp2_loc,
7637                                                 TIMES_4,
7638                                                 object_array_data_offset));
7639       __ j(kNotEqual, &start_loop);  // Return if same class.
7640       // If `cls` was poisoned above, unpoison it.
7641       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7642       break;
7643     }
7644 
7645     case TypeCheckKind::kBitstringCheck: {
7646       // /* HeapReference<Class> */ temp = obj->klass_
7647       GenerateReferenceLoadTwoRegisters(instruction,
7648                                         temp_loc,
7649                                         obj_loc,
7650                                         class_offset,
7651                                         kWithoutReadBarrier);
7652 
7653       GenerateBitstringTypeCheckCompare(instruction, temp);
7654       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7655       break;
7656     }
7657   }
7658 
7659   if (done.IsLinked()) {
7660     __ Bind(&done);
7661   }
7662 
7663   __ Bind(type_check_slow_path->GetExitLabel());
7664 }
7665 
VisitMonitorOperation(HMonitorOperation * instruction)7666 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7667   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7668       instruction, LocationSummary::kCallOnMainOnly);
7669   InvokeRuntimeCallingConvention calling_convention;
7670   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7671 }
7672 
VisitMonitorOperation(HMonitorOperation * instruction)7673 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7674   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7675                           instruction,
7676                           instruction->GetDexPc());
7677   if (instruction->IsEnter()) {
7678     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7679   } else {
7680     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7681   }
7682 }
7683 
VisitX86AndNot(HX86AndNot * instruction)7684 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7685   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7686   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7687   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7688   locations->SetInAt(0, Location::RequiresRegister());
7689   // There is no immediate variant of negated bitwise and in X86.
7690   locations->SetInAt(1, Location::RequiresRegister());
7691   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7692 }
7693 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7694 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7695   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7696   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7697   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7698   locations->SetInAt(0, Location::RequiresRegister());
7699   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7700 }
7701 
VisitX86AndNot(HX86AndNot * instruction)7702 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7703   LocationSummary* locations = instruction->GetLocations();
7704   Location first = locations->InAt(0);
7705   Location second = locations->InAt(1);
7706   Location dest = locations->Out();
7707   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7708 }
7709 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7710 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7711   LocationSummary* locations = instruction->GetLocations();
7712   Location src = locations->InAt(0);
7713   Location dest = locations->Out();
7714   switch (instruction->GetOpKind()) {
7715     case HInstruction::kAnd:
7716       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7717       break;
7718     case HInstruction::kXor:
7719       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7720       break;
7721     default:
7722       LOG(FATAL) << "Unreachable";
7723   }
7724 }
7725 
VisitAnd(HAnd * instruction)7726 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7727 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7728 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7729 
HandleBitwiseOperation(HBinaryOperation * instruction)7730 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7731   LocationSummary* locations =
7732       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7733   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7734          || instruction->GetResultType() == DataType::Type::kInt64);
7735   locations->SetInAt(0, Location::RequiresRegister());
7736   locations->SetInAt(1, Location::Any());
7737   locations->SetOut(Location::SameAsFirstInput());
7738 }
7739 
VisitAnd(HAnd * instruction)7740 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7741   HandleBitwiseOperation(instruction);
7742 }
7743 
VisitOr(HOr * instruction)7744 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7745   HandleBitwiseOperation(instruction);
7746 }
7747 
VisitXor(HXor * instruction)7748 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7749   HandleBitwiseOperation(instruction);
7750 }
7751 
HandleBitwiseOperation(HBinaryOperation * instruction)7752 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7753   LocationSummary* locations = instruction->GetLocations();
7754   Location first = locations->InAt(0);
7755   Location second = locations->InAt(1);
7756   DCHECK(first.Equals(locations->Out()));
7757 
7758   if (instruction->GetResultType() == DataType::Type::kInt32) {
7759     if (second.IsRegister()) {
7760       if (instruction->IsAnd()) {
7761         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7762       } else if (instruction->IsOr()) {
7763         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7764       } else {
7765         DCHECK(instruction->IsXor());
7766         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7767       }
7768     } else if (second.IsConstant()) {
7769       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7770       if (instruction->IsAnd()) {
7771         __ andl(first.AsRegister<CpuRegister>(), imm);
7772       } else if (instruction->IsOr()) {
7773         __ orl(first.AsRegister<CpuRegister>(), imm);
7774       } else {
7775         DCHECK(instruction->IsXor());
7776         __ xorl(first.AsRegister<CpuRegister>(), imm);
7777       }
7778     } else {
7779       Address address(CpuRegister(RSP), second.GetStackIndex());
7780       if (instruction->IsAnd()) {
7781         __ andl(first.AsRegister<CpuRegister>(), address);
7782       } else if (instruction->IsOr()) {
7783         __ orl(first.AsRegister<CpuRegister>(), address);
7784       } else {
7785         DCHECK(instruction->IsXor());
7786         __ xorl(first.AsRegister<CpuRegister>(), address);
7787       }
7788     }
7789   } else {
7790     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7791     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7792     bool second_is_constant = false;
7793     int64_t value = 0;
7794     if (second.IsConstant()) {
7795       second_is_constant = true;
7796       value = second.GetConstant()->AsLongConstant()->GetValue();
7797     }
7798     bool is_int32_value = IsInt<32>(value);
7799 
7800     if (instruction->IsAnd()) {
7801       if (second_is_constant) {
7802         if (is_int32_value) {
7803           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7804         } else {
7805           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7806         }
7807       } else if (second.IsDoubleStackSlot()) {
7808         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7809       } else {
7810         __ andq(first_reg, second.AsRegister<CpuRegister>());
7811       }
7812     } else if (instruction->IsOr()) {
7813       if (second_is_constant) {
7814         if (is_int32_value) {
7815           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7816         } else {
7817           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7818         }
7819       } else if (second.IsDoubleStackSlot()) {
7820         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7821       } else {
7822         __ orq(first_reg, second.AsRegister<CpuRegister>());
7823       }
7824     } else {
7825       DCHECK(instruction->IsXor());
7826       if (second_is_constant) {
7827         if (is_int32_value) {
7828           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7829         } else {
7830           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7831         }
7832       } else if (second.IsDoubleStackSlot()) {
7833         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7834       } else {
7835         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7836       }
7837     }
7838   }
7839 }
7840 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7841 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7842     HInstruction* instruction,
7843     Location out,
7844     uint32_t offset,
7845     Location maybe_temp,
7846     ReadBarrierOption read_barrier_option) {
7847   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7848   if (read_barrier_option == kWithReadBarrier) {
7849     DCHECK(codegen_->EmitReadBarrier());
7850     if (kUseBakerReadBarrier) {
7851       // Load with fast path based Baker's read barrier.
7852       // /* HeapReference<Object> */ out = *(out + offset)
7853       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7854           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7855     } else {
7856       // Load with slow path based read barrier.
7857       // Save the value of `out` into `maybe_temp` before overwriting it
7858       // in the following move operation, as we will need it for the
7859       // read barrier below.
7860       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7861       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7862       // /* HeapReference<Object> */ out = *(out + offset)
7863       __ movl(out_reg, Address(out_reg, offset));
7864       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7865     }
7866   } else {
7867     // Plain load with no read barrier.
7868     // /* HeapReference<Object> */ out = *(out + offset)
7869     __ movl(out_reg, Address(out_reg, offset));
7870     __ MaybeUnpoisonHeapReference(out_reg);
7871   }
7872 }
7873 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7874 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7875     HInstruction* instruction,
7876     Location out,
7877     Location obj,
7878     uint32_t offset,
7879     ReadBarrierOption read_barrier_option) {
7880   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7881   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7882   if (read_barrier_option == kWithReadBarrier) {
7883     DCHECK(codegen_->EmitReadBarrier());
7884     if (kUseBakerReadBarrier) {
7885       // Load with fast path based Baker's read barrier.
7886       // /* HeapReference<Object> */ out = *(obj + offset)
7887       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7888           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7889     } else {
7890       // Load with slow path based read barrier.
7891       // /* HeapReference<Object> */ out = *(obj + offset)
7892       __ movl(out_reg, Address(obj_reg, offset));
7893       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7894     }
7895   } else {
7896     // Plain load with no read barrier.
7897     // /* HeapReference<Object> */ out = *(obj + offset)
7898     __ movl(out_reg, Address(obj_reg, offset));
7899     __ MaybeUnpoisonHeapReference(out_reg);
7900   }
7901 }
7902 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7903 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7904     HInstruction* instruction,
7905     Location root,
7906     const Address& address,
7907     Label* fixup_label,
7908     ReadBarrierOption read_barrier_option) {
7909   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7910   if (read_barrier_option == kWithReadBarrier) {
7911     DCHECK(codegen_->EmitReadBarrier());
7912     if (kUseBakerReadBarrier) {
7913       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7914       // Baker's read barrier are used:
7915       //
7916       //   root = obj.field;
7917       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7918       //   if (temp != null) {
7919       //     root = temp(root)
7920       //   }
7921 
7922       // /* GcRoot<mirror::Object> */ root = *address
7923       __ movl(root_reg, address);
7924       if (fixup_label != nullptr) {
7925         __ Bind(fixup_label);
7926       }
7927       static_assert(
7928           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7929           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7930           "have different sizes.");
7931       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7932                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7933                     "have different sizes.");
7934 
7935       // Slow path marking the GC root `root`.
7936       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7937           instruction, root, /* unpoison_ref_before_marking= */ false);
7938       codegen_->AddSlowPath(slow_path);
7939 
7940       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7941       const int32_t entry_point_offset =
7942           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7943       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7944       // The entrypoint is null when the GC is not marking.
7945       __ j(kNotEqual, slow_path->GetEntryLabel());
7946       __ Bind(slow_path->GetExitLabel());
7947     } else {
7948       // GC root loaded through a slow path for read barriers other
7949       // than Baker's.
7950       // /* GcRoot<mirror::Object>* */ root = address
7951       __ leaq(root_reg, address);
7952       if (fixup_label != nullptr) {
7953         __ Bind(fixup_label);
7954       }
7955       // /* mirror::Object* */ root = root->Read()
7956       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7957     }
7958   } else {
7959     // Plain GC root load with no read barrier.
7960     // /* GcRoot<mirror::Object> */ root = *address
7961     __ movl(root_reg, address);
7962     if (fixup_label != nullptr) {
7963       __ Bind(fixup_label);
7964     }
7965     // Note that GC roots are not affected by heap poisoning, thus we
7966     // do not have to unpoison `root_reg` here.
7967   }
7968 }
7969 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7970 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7971                                                                 Location ref,
7972                                                                 CpuRegister obj,
7973                                                                 uint32_t offset,
7974                                                                 bool needs_null_check) {
7975   DCHECK(EmitBakerReadBarrier());
7976 
7977   // /* HeapReference<Object> */ ref = *(obj + offset)
7978   Address src(obj, offset);
7979   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7980 }
7981 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7982 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7983                                                                 Location ref,
7984                                                                 CpuRegister obj,
7985                                                                 uint32_t data_offset,
7986                                                                 Location index,
7987                                                                 bool needs_null_check) {
7988   DCHECK(EmitBakerReadBarrier());
7989 
7990   static_assert(
7991       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7992       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7993   // /* HeapReference<Object> */ ref =
7994   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7995   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7996   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7997 }
7998 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7999 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8000                                                                     Location ref,
8001                                                                     CpuRegister obj,
8002                                                                     const Address& src,
8003                                                                     bool needs_null_check,
8004                                                                     bool always_update_field,
8005                                                                     CpuRegister* temp1,
8006                                                                     CpuRegister* temp2) {
8007   DCHECK(EmitBakerReadBarrier());
8008 
8009   // In slow path based read barriers, the read barrier call is
8010   // inserted after the original load. However, in fast path based
8011   // Baker's read barriers, we need to perform the load of
8012   // mirror::Object::monitor_ *before* the original reference load.
8013   // This load-load ordering is required by the read barrier.
8014   // The fast path/slow path (for Baker's algorithm) should look like:
8015   //
8016   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8017   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8018   //   HeapReference<Object> ref = *src;  // Original reference load.
8019   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8020   //   if (is_gray) {
8021   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8022   //   }
8023   //
8024   // Note: the original implementation in ReadBarrier::Barrier is
8025   // slightly more complex as:
8026   // - it implements the load-load fence using a data dependency on
8027   //   the high-bits of rb_state, which are expected to be all zeroes
8028   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8029   //   here, which is a no-op thanks to the x86-64 memory model);
8030   // - it performs additional checks that we do not do here for
8031   //   performance reasons.
8032 
8033   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8034   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8035 
8036   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8037   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8038   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8039   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8040   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8041   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8042 
8043   // if (rb_state == ReadBarrier::GrayState())
8044   //   ref = ReadBarrier::Mark(ref);
8045   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8046   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8047   if (needs_null_check) {
8048     MaybeRecordImplicitNullCheck(instruction);
8049   }
8050 
8051   // Load fence to prevent load-load reordering.
8052   // Note that this is a no-op, thanks to the x86-64 memory model.
8053   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8054 
8055   // The actual reference load.
8056   // /* HeapReference<Object> */ ref = *src
8057   __ movl(ref_reg, src);  // Flags are unaffected.
8058 
8059   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8060   // Slow path marking the object `ref` when it is gray.
8061   SlowPathCode* slow_path;
8062   if (always_update_field) {
8063     DCHECK(temp1 != nullptr);
8064     DCHECK(temp2 != nullptr);
8065     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8066         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8067   } else {
8068     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8069         instruction, ref, /* unpoison_ref_before_marking= */ true);
8070   }
8071   AddSlowPath(slow_path);
8072 
8073   // We have done the "if" of the gray bit check above, now branch based on the flags.
8074   __ j(kNotZero, slow_path->GetEntryLabel());
8075 
8076   // Object* ref = ref_addr->AsMirrorPtr()
8077   __ MaybeUnpoisonHeapReference(ref_reg);
8078 
8079   __ Bind(slow_path->GetExitLabel());
8080 }
8081 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8082 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8083                                                   Location out,
8084                                                   Location ref,
8085                                                   Location obj,
8086                                                   uint32_t offset,
8087                                                   Location index) {
8088   DCHECK(EmitReadBarrier());
8089 
8090   // Insert a slow path based read barrier *after* the reference load.
8091   //
8092   // If heap poisoning is enabled, the unpoisoning of the loaded
8093   // reference will be carried out by the runtime within the slow
8094   // path.
8095   //
8096   // Note that `ref` currently does not get unpoisoned (when heap
8097   // poisoning is enabled), which is alright as the `ref` argument is
8098   // not used by the artReadBarrierSlow entry point.
8099   //
8100   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8101   SlowPathCode* slow_path = new (GetScopedAllocator())
8102       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8103   AddSlowPath(slow_path);
8104 
8105   __ jmp(slow_path->GetEntryLabel());
8106   __ Bind(slow_path->GetExitLabel());
8107 }
8108 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8109 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8110                                                        Location out,
8111                                                        Location ref,
8112                                                        Location obj,
8113                                                        uint32_t offset,
8114                                                        Location index) {
8115   if (EmitReadBarrier()) {
8116     // Baker's read barriers shall be handled by the fast path
8117     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8118     DCHECK(!kUseBakerReadBarrier);
8119     // If heap poisoning is enabled, unpoisoning will be taken care of
8120     // by the runtime within the slow path.
8121     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8122   } else if (kPoisonHeapReferences) {
8123     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8124   }
8125 }
8126 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8127 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8128                                                          Location out,
8129                                                          Location root) {
8130   DCHECK(EmitReadBarrier());
8131 
8132   // Insert a slow path based read barrier *after* the GC root load.
8133   //
8134   // Note that GC roots are not affected by heap poisoning, so we do
8135   // not need to do anything special for this here.
8136   SlowPathCode* slow_path =
8137       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8138   AddSlowPath(slow_path);
8139 
8140   __ jmp(slow_path->GetEntryLabel());
8141   __ Bind(slow_path->GetExitLabel());
8142 }
8143 
VisitBoundType(HBoundType * instruction)8144 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8145   // Nothing to do, this should be removed during prepare for register allocator.
8146   LOG(FATAL) << "Unreachable";
8147 }
8148 
VisitBoundType(HBoundType * instruction)8149 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8150   // Nothing to do, this should be removed during prepare for register allocator.
8151   LOG(FATAL) << "Unreachable";
8152 }
8153 
8154 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8155 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8156   LocationSummary* locations =
8157       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8158   locations->SetInAt(0, Location::RequiresRegister());
8159   locations->AddTemp(Location::RequiresRegister());
8160   locations->AddTemp(Location::RequiresRegister());
8161 }
8162 
VisitPackedSwitch(HPackedSwitch * switch_instr)8163 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8164   int32_t lower_bound = switch_instr->GetStartValue();
8165   uint32_t num_entries = switch_instr->GetNumEntries();
8166   LocationSummary* locations = switch_instr->GetLocations();
8167   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8168   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8169   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8170   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8171 
8172   // Should we generate smaller inline compare/jumps?
8173   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8174     // Figure out the correct compare values and jump conditions.
8175     // Handle the first compare/branch as a special case because it might
8176     // jump to the default case.
8177     DCHECK_GT(num_entries, 2u);
8178     Condition first_condition;
8179     uint32_t index;
8180     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8181     if (lower_bound != 0) {
8182       first_condition = kLess;
8183       __ cmpl(value_reg_in, Immediate(lower_bound));
8184       __ j(first_condition, codegen_->GetLabelOf(default_block));
8185       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8186 
8187       index = 1;
8188     } else {
8189       // Handle all the compare/jumps below.
8190       first_condition = kBelow;
8191       index = 0;
8192     }
8193 
8194     // Handle the rest of the compare/jumps.
8195     for (; index + 1 < num_entries; index += 2) {
8196       int32_t compare_to_value = lower_bound + index + 1;
8197       __ cmpl(value_reg_in, Immediate(compare_to_value));
8198       // Jump to successors[index] if value < case_value[index].
8199       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8200       // Jump to successors[index + 1] if value == case_value[index + 1].
8201       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8202     }
8203 
8204     if (index != num_entries) {
8205       // There are an odd number of entries. Handle the last one.
8206       DCHECK_EQ(index + 1, num_entries);
8207       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8208       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8209     }
8210 
8211     // And the default for any other value.
8212     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8213       __ jmp(codegen_->GetLabelOf(default_block));
8214     }
8215     return;
8216   }
8217 
8218   // Remove the bias, if needed.
8219   Register value_reg_out = value_reg_in.AsRegister();
8220   if (lower_bound != 0) {
8221     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8222     value_reg_out = temp_reg.AsRegister();
8223   }
8224   CpuRegister value_reg(value_reg_out);
8225 
8226   // Is the value in range?
8227   __ cmpl(value_reg, Immediate(num_entries - 1));
8228   __ j(kAbove, codegen_->GetLabelOf(default_block));
8229 
8230   // We are in the range of the table.
8231   // Load the address of the jump table in the constant area.
8232   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8233 
8234   // Load the (signed) offset from the jump table.
8235   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8236 
8237   // Add the offset to the address of the table base.
8238   __ addq(temp_reg, base_reg);
8239 
8240   // And jump.
8241   __ jmp(temp_reg);
8242 }
8243 
VisitIntermediateAddress(HIntermediateAddress * instruction)8244 void LocationsBuilderX86_64::VisitIntermediateAddress(
8245     [[maybe_unused]] HIntermediateAddress* instruction) {
8246   LOG(FATAL) << "Unreachable";
8247 }
8248 
VisitIntermediateAddress(HIntermediateAddress * instruction)8249 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8250     [[maybe_unused]] HIntermediateAddress* instruction) {
8251   LOG(FATAL) << "Unreachable";
8252 }
8253 
Load32BitValue(CpuRegister dest,int32_t value)8254 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8255   if (value == 0) {
8256     __ xorl(dest, dest);
8257   } else {
8258     __ movl(dest, Immediate(value));
8259   }
8260 }
8261 
Load64BitValue(CpuRegister dest,int64_t value)8262 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8263   if (value == 0) {
8264     // Clears upper bits too.
8265     __ xorl(dest, dest);
8266   } else if (IsUint<32>(value)) {
8267     // We can use a 32 bit move, as it will zero-extend and is shorter.
8268     __ movl(dest, Immediate(static_cast<int32_t>(value)));
8269   } else {
8270     __ movq(dest, Immediate(value));
8271   }
8272 }
8273 
Load32BitValue(XmmRegister dest,int32_t value)8274 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8275   if (value == 0) {
8276     __ xorps(dest, dest);
8277   } else {
8278     __ movss(dest, LiteralInt32Address(value));
8279   }
8280 }
8281 
Load64BitValue(XmmRegister dest,int64_t value)8282 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8283   if (value == 0) {
8284     __ xorpd(dest, dest);
8285   } else {
8286     __ movsd(dest, LiteralInt64Address(value));
8287   }
8288 }
8289 
Load32BitValue(XmmRegister dest,float value)8290 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8291   Load32BitValue(dest, bit_cast<int32_t, float>(value));
8292 }
8293 
Load64BitValue(XmmRegister dest,double value)8294 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8295   Load64BitValue(dest, bit_cast<int64_t, double>(value));
8296 }
8297 
Compare32BitValue(CpuRegister dest,int32_t value)8298 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8299   if (value == 0) {
8300     __ testl(dest, dest);
8301   } else {
8302     __ cmpl(dest, Immediate(value));
8303   }
8304 }
8305 
Compare64BitValue(CpuRegister dest,int64_t value)8306 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8307   if (IsInt<32>(value)) {
8308     if (value == 0) {
8309       __ testq(dest, dest);
8310     } else {
8311       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8312     }
8313   } else {
8314     // Value won't fit in an int.
8315     __ cmpq(dest, LiteralInt64Address(value));
8316   }
8317 }
8318 
GenerateIntCompare(Location lhs,Location rhs)8319 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8320   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8321   GenerateIntCompare(lhs_reg, rhs);
8322 }
8323 
GenerateIntCompare(CpuRegister lhs,Location rhs)8324 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8325   if (rhs.IsConstant()) {
8326     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8327     Compare32BitValue(lhs, value);
8328   } else if (rhs.IsStackSlot()) {
8329     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8330   } else {
8331     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8332   }
8333 }
8334 
GenerateLongCompare(Location lhs,Location rhs)8335 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8336   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8337   if (rhs.IsConstant()) {
8338     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8339     Compare64BitValue(lhs_reg, value);
8340   } else if (rhs.IsDoubleStackSlot()) {
8341     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8342   } else {
8343     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8344   }
8345 }
8346 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8347 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8348                                           Location index,
8349                                           ScaleFactor scale,
8350                                           uint32_t data_offset) {
8351   return index.IsConstant()
8352       ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8353       : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8354 }
8355 
Store64BitValueToStack(Location dest,int64_t value)8356 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8357   DCHECK(dest.IsDoubleStackSlot());
8358   if (IsInt<32>(value)) {
8359     // Can move directly as an int32 constant.
8360     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8361             Immediate(static_cast<int32_t>(value)));
8362   } else {
8363     Load64BitValue(CpuRegister(TMP), value);
8364     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8365   }
8366 }
8367 
8368 /**
8369  * Class to handle late fixup of offsets into constant area.
8370  */
8371 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8372  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8373   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8374       : codegen_(&codegen), offset_into_constant_area_(offset) {}
8375 
8376  protected:
SetOffset(size_t offset)8377   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8378 
8379   CodeGeneratorX86_64* codegen_;
8380 
8381  private:
Process(const MemoryRegion & region,int pos)8382   void Process(const MemoryRegion& region, int pos) override {
8383     // Patch the correct offset for the instruction.  We use the address of the
8384     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8385     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8386     int32_t relative_position = constant_offset - pos;
8387 
8388     // Patch in the right value.
8389     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8390   }
8391 
8392   // Location in constant area that the fixup refers to.
8393   size_t offset_into_constant_area_;
8394 };
8395 
8396 /**
8397  t * Class to handle late fixup of offsets to a jump table that will be created in the
8398  * constant area.
8399  */
8400 class JumpTableRIPFixup : public RIPFixup {
8401  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8402   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8403       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8404 
CreateJumpTable()8405   void CreateJumpTable() {
8406     X86_64Assembler* assembler = codegen_->GetAssembler();
8407 
8408     // Ensure that the reference to the jump table has the correct offset.
8409     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8410     SetOffset(offset_in_constant_table);
8411 
8412     // Compute the offset from the start of the function to this jump table.
8413     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8414 
8415     // Populate the jump table with the correct values for the jump table.
8416     int32_t num_entries = switch_instr_->GetNumEntries();
8417     HBasicBlock* block = switch_instr_->GetBlock();
8418     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8419     // The value that we want is the target offset - the position of the table.
8420     for (int32_t i = 0; i < num_entries; i++) {
8421       HBasicBlock* b = successors[i];
8422       Label* l = codegen_->GetLabelOf(b);
8423       DCHECK(l->IsBound());
8424       int32_t offset_to_block = l->Position() - current_table_offset;
8425       assembler->AppendInt32(offset_to_block);
8426     }
8427   }
8428 
8429  private:
8430   const HPackedSwitch* switch_instr_;
8431 };
8432 
Finalize()8433 void CodeGeneratorX86_64::Finalize() {
8434   // Generate the constant area if needed.
8435   X86_64Assembler* assembler = GetAssembler();
8436   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8437     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8438     assembler->Align(4, 0);
8439     constant_area_start_ = assembler->CodeSize();
8440 
8441     // Populate any jump tables.
8442     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8443       jump_table->CreateJumpTable();
8444     }
8445 
8446     // And now add the constant area to the generated code.
8447     assembler->AddConstantArea();
8448   }
8449 
8450   // And finish up.
8451   CodeGenerator::Finalize();
8452 }
8453 
LiteralDoubleAddress(double v)8454 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8455   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8456   return Address::RIP(fixup);
8457 }
8458 
LiteralFloatAddress(float v)8459 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8460   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8461   return Address::RIP(fixup);
8462 }
8463 
LiteralInt32Address(int32_t v)8464 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8465   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8466   return Address::RIP(fixup);
8467 }
8468 
LiteralInt64Address(int64_t v)8469 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8470   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8471   return Address::RIP(fixup);
8472 }
8473 
8474 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8475 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8476   if (!trg.IsValid()) {
8477     DCHECK_EQ(type, DataType::Type::kVoid);
8478     return;
8479   }
8480 
8481   DCHECK_NE(type, DataType::Type::kVoid);
8482 
8483   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8484   if (trg.Equals(return_loc)) {
8485     return;
8486   }
8487 
8488   // Let the parallel move resolver take care of all of this.
8489   HParallelMove parallel_move(GetGraph()->GetAllocator());
8490   parallel_move.AddMove(return_loc, trg, type, nullptr);
8491   GetMoveResolver()->EmitNativeCode(&parallel_move);
8492 }
8493 
LiteralCaseTable(HPackedSwitch * switch_instr)8494 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8495   // Create a fixup to be used to create and address the jump table.
8496   JumpTableRIPFixup* table_fixup =
8497       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8498 
8499   // We have to populate the jump tables.
8500   fixups_to_jump_tables_.push_back(table_fixup);
8501   return Address::RIP(table_fixup);
8502 }
8503 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8504 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8505                                              const Address& addr_high,
8506                                              int64_t v,
8507                                              HInstruction* instruction) {
8508   if (IsInt<32>(v)) {
8509     int32_t v_32 = v;
8510     __ movq(addr_low, Immediate(v_32));
8511     MaybeRecordImplicitNullCheck(instruction);
8512   } else {
8513     // Didn't fit in a register.  Do it in pieces.
8514     int32_t low_v = Low32Bits(v);
8515     int32_t high_v = High32Bits(v);
8516     __ movl(addr_low, Immediate(low_v));
8517     MaybeRecordImplicitNullCheck(instruction);
8518     __ movl(addr_high, Immediate(high_v));
8519   }
8520 }
8521 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8522 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8523                                           const uint8_t* roots_data,
8524                                           const PatchInfo<Label>& info,
8525                                           uint64_t index_in_table) const {
8526   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8527   uintptr_t address =
8528       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8529   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8530   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8531       dchecked_integral_cast<uint32_t>(address);
8532 }
8533 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8534 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8535   for (const PatchInfo<Label>& info : jit_string_patches_) {
8536     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8537     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8538     PatchJitRootUse(code, roots_data, info, index_in_table);
8539   }
8540 
8541   for (const PatchInfo<Label>& info : jit_class_patches_) {
8542     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8543     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8544     PatchJitRootUse(code, roots_data, info, index_in_table);
8545   }
8546 }
8547 
CpuHasAvxFeatureFlag()8548 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8549   return codegen_->GetInstructionSetFeatures().HasAVX();
8550 }
8551 
CpuHasAvx2FeatureFlag()8552 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8553   return codegen_->GetInstructionSetFeatures().HasAVX2();
8554 }
8555 
CpuHasAvxFeatureFlag()8556 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8557   return codegen_->GetInstructionSetFeatures().HasAVX();
8558 }
8559 
CpuHasAvx2FeatureFlag()8560 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8561   return codegen_->GetInstructionSetFeatures().HasAVX2();
8562 }
8563 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8564 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8565     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8566   LOG(FATAL) << "Unimplemented";
8567 }
8568 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8569 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8570     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8571   LOG(FATAL) << "Unimplemented";
8572 }
8573 
8574 #undef __
8575 
8576 }  // namespace x86_64
8577 }  // namespace art
8578