1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34 
35 namespace art {
36 
37 template<class MirrorType>
38 class GcRoot;
39 
40 namespace x86_64 {
41 
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48 
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51 
52 static constexpr int kC2ConditionMask = 0x400;
53 
54 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56 
57 class NullCheckSlowPathX86_64 : public SlowPathCode {
58  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)59   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60 
EmitNativeCode(CodeGenerator * codegen)61   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63     __ Bind(GetEntryLabel());
64     if (instruction_->CanThrowIntoCatchBlock()) {
65       // Live registers will be restored in the catch block if caught.
66       SaveLiveRegisters(codegen, instruction_->GetLocations());
67     }
68     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69                                   instruction_,
70                                   instruction_->GetDexPc(),
71                                   this);
72     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73   }
74 
IsFatal() const75   bool IsFatal() const OVERRIDE { return true; }
76 
GetDescription() const77   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78 
79  private:
80   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81 };
82 
83 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)85   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86 
EmitNativeCode(CodeGenerator * codegen)87   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89     __ Bind(GetEntryLabel());
90     if (instruction_->CanThrowIntoCatchBlock()) {
91       // Live registers will be restored in the catch block if caught.
92       SaveLiveRegisters(codegen, instruction_->GetLocations());
93     }
94     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95                                   instruction_,
96                                   instruction_->GetDexPc(),
97                                   this);
98     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99   }
100 
IsFatal() const101   bool IsFatal() const OVERRIDE { return true; }
102 
GetDescription() const103   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104 
105  private:
106   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107 };
108 
109 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)111   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113 
EmitNativeCode(CodeGenerator * codegen)114   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115     __ Bind(GetEntryLabel());
116     if (type_ == Primitive::kPrimInt) {
117       if (is_div_) {
118         __ negl(cpu_reg_);
119       } else {
120         __ xorl(cpu_reg_, cpu_reg_);
121       }
122 
123     } else {
124       DCHECK_EQ(Primitive::kPrimLong, type_);
125       if (is_div_) {
126         __ negq(cpu_reg_);
127       } else {
128         __ xorl(cpu_reg_, cpu_reg_);
129       }
130     }
131     __ jmp(GetExitLabel());
132   }
133 
GetDescription() const134   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135 
136  private:
137   const CpuRegister cpu_reg_;
138   const Primitive::Type type_;
139   const bool is_div_;
140   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141 };
142 
143 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)145   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146       : SlowPathCode(instruction), successor_(successor) {}
147 
EmitNativeCode(CodeGenerator * codegen)148   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150     __ Bind(GetEntryLabel());
151     SaveLiveRegisters(codegen, instruction_->GetLocations());
152     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153                                   instruction_,
154                                   instruction_->GetDexPc(),
155                                   this);
156     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157     RestoreLiveRegisters(codegen, instruction_->GetLocations());
158     if (successor_ == nullptr) {
159       __ jmp(GetReturnLabel());
160     } else {
161       __ jmp(x86_64_codegen->GetLabelOf(successor_));
162     }
163   }
164 
GetReturnLabel()165   Label* GetReturnLabel() {
166     DCHECK(successor_ == nullptr);
167     return &return_label_;
168   }
169 
GetSuccessor() const170   HBasicBlock* GetSuccessor() const {
171     return successor_;
172   }
173 
GetDescription() const174   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175 
176  private:
177   HBasicBlock* const successor_;
178   Label return_label_;
179 
180   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181 };
182 
183 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)185   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186     : SlowPathCode(instruction) {}
187 
EmitNativeCode(CodeGenerator * codegen)188   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189     LocationSummary* locations = instruction_->GetLocations();
190     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191     __ Bind(GetEntryLabel());
192     if (instruction_->CanThrowIntoCatchBlock()) {
193       // Live registers will be restored in the catch block if caught.
194       SaveLiveRegisters(codegen, instruction_->GetLocations());
195     }
196     // We're moving two locations to locations that could overlap, so we need a parallel
197     // move resolver.
198     InvokeRuntimeCallingConvention calling_convention;
199     codegen->EmitParallelMoves(
200         locations->InAt(0),
201         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202         Primitive::kPrimInt,
203         locations->InAt(1),
204         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205         Primitive::kPrimInt);
206     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207                                   instruction_,
208                                   instruction_->GetDexPc(),
209                                   this);
210     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211   }
212 
IsFatal() const213   bool IsFatal() const OVERRIDE { return true; }
214 
GetDescription() const215   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216 
217  private:
218   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219 };
220 
221 class LoadClassSlowPathX86_64 : public SlowPathCode {
222  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)223   LoadClassSlowPathX86_64(HLoadClass* cls,
224                           HInstruction* at,
225                           uint32_t dex_pc,
226                           bool do_clinit)
227       : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229   }
230 
EmitNativeCode(CodeGenerator * codegen)231   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232     LocationSummary* locations = at_->GetLocations();
233     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234     __ Bind(GetEntryLabel());
235 
236     SaveLiveRegisters(codegen, locations);
237 
238     InvokeRuntimeCallingConvention calling_convention;
239     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240     x86_64_codegen->InvokeRuntime(do_clinit_ ?
241                                       QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242                                       QUICK_ENTRY_POINT(pInitializeType),
243                                   at_,
244                                   dex_pc_,
245                                   this);
246     if (do_clinit_) {
247       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248     } else {
249       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250     }
251 
252     Location out = locations->Out();
253     // Move the class to the desired location.
254     if (out.IsValid()) {
255       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257     }
258 
259     RestoreLiveRegisters(codegen, locations);
260     __ jmp(GetExitLabel());
261   }
262 
GetDescription() const263   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264 
265  private:
266   // The class this slow path will load.
267   HLoadClass* const cls_;
268 
269   // The instruction where this slow path is happening.
270   // (Might be the load class or an initialization check).
271   HInstruction* const at_;
272 
273   // The dex PC of `at_`.
274   const uint32_t dex_pc_;
275 
276   // Whether to initialize the class.
277   const bool do_clinit_;
278 
279   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280 };
281 
282 class LoadStringSlowPathX86_64 : public SlowPathCode {
283  public:
LoadStringSlowPathX86_64(HLoadString * instruction)284   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285 
EmitNativeCode(CodeGenerator * codegen)286   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287     LocationSummary* locations = instruction_->GetLocations();
288     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289 
290     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291     __ Bind(GetEntryLabel());
292     SaveLiveRegisters(codegen, locations);
293 
294     InvokeRuntimeCallingConvention calling_convention;
295     const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298                                   instruction_,
299                                   instruction_->GetDexPc(),
300                                   this);
301     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303     RestoreLiveRegisters(codegen, locations);
304     __ jmp(GetExitLabel());
305   }
306 
GetDescription() const307   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308 
309  private:
310   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311 };
312 
313 class TypeCheckSlowPathX86_64 : public SlowPathCode {
314  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)315   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317 
EmitNativeCode(CodeGenerator * codegen)318   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319     LocationSummary* locations = instruction_->GetLocations();
320     Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321                                                         : locations->Out();
322     uint32_t dex_pc = instruction_->GetDexPc();
323     DCHECK(instruction_->IsCheckCast()
324            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325 
326     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327     __ Bind(GetEntryLabel());
328 
329     if (!is_fatal_) {
330       SaveLiveRegisters(codegen, locations);
331     }
332 
333     // We're moving two locations to locations that could overlap, so we need a parallel
334     // move resolver.
335     InvokeRuntimeCallingConvention calling_convention;
336     codegen->EmitParallelMoves(
337         locations->InAt(1),
338         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339         Primitive::kPrimNot,
340         object_class,
341         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342         Primitive::kPrimNot);
343 
344     if (instruction_->IsInstanceOf()) {
345       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346                                     instruction_,
347                                     dex_pc,
348                                     this);
349       CheckEntrypointTypes<
350           kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351     } else {
352       DCHECK(instruction_->IsCheckCast());
353       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354                                     instruction_,
355                                     dex_pc,
356                                     this);
357       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358     }
359 
360     if (!is_fatal_) {
361       if (instruction_->IsInstanceOf()) {
362         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363       }
364 
365       RestoreLiveRegisters(codegen, locations);
366       __ jmp(GetExitLabel());
367     }
368   }
369 
GetDescription() const370   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371 
IsFatal() const372   bool IsFatal() const OVERRIDE { return is_fatal_; }
373 
374  private:
375   const bool is_fatal_;
376 
377   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378 };
379 
380 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)382   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383       : SlowPathCode(instruction) {}
384 
EmitNativeCode(CodeGenerator * codegen)385   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387     __ Bind(GetEntryLabel());
388     SaveLiveRegisters(codegen, instruction_->GetLocations());
389     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390                                   instruction_,
391                                   instruction_->GetDexPc(),
392                                   this);
393     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394   }
395 
GetDescription() const396   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397 
398  private:
399   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400 };
401 
402 class ArraySetSlowPathX86_64 : public SlowPathCode {
403  public:
ArraySetSlowPathX86_64(HInstruction * instruction)404   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407     LocationSummary* locations = instruction_->GetLocations();
408     __ Bind(GetEntryLabel());
409     SaveLiveRegisters(codegen, locations);
410 
411     InvokeRuntimeCallingConvention calling_convention;
412     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413     parallel_move.AddMove(
414         locations->InAt(0),
415         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416         Primitive::kPrimNot,
417         nullptr);
418     parallel_move.AddMove(
419         locations->InAt(1),
420         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421         Primitive::kPrimInt,
422         nullptr);
423     parallel_move.AddMove(
424         locations->InAt(2),
425         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426         Primitive::kPrimNot,
427         nullptr);
428     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
429 
430     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432                                   instruction_,
433                                   instruction_->GetDexPc(),
434                                   this);
435     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436     RestoreLiveRegisters(codegen, locations);
437     __ jmp(GetExitLabel());
438   }
439 
GetDescription() const440   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441 
442  private:
443   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444 };
445 
446 // Slow path marking an object during a read barrier.
447 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location out,Location obj)449   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450       : SlowPathCode(instruction), out_(out), obj_(obj) {
451     DCHECK(kEmitCompilerReadBarrier);
452   }
453 
GetDescription() const454   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455 
EmitNativeCode(CodeGenerator * codegen)456   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457     LocationSummary* locations = instruction_->GetLocations();
458     Register reg_out = out_.AsRegister<Register>();
459     DCHECK(locations->CanCall());
460     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461     DCHECK(instruction_->IsInstanceFieldGet() ||
462            instruction_->IsStaticFieldGet() ||
463            instruction_->IsArrayGet() ||
464            instruction_->IsLoadClass() ||
465            instruction_->IsLoadString() ||
466            instruction_->IsInstanceOf() ||
467            instruction_->IsCheckCast())
468         << "Unexpected instruction in read barrier marking slow path: "
469         << instruction_->DebugName();
470 
471     __ Bind(GetEntryLabel());
472     SaveLiveRegisters(codegen, locations);
473 
474     InvokeRuntimeCallingConvention calling_convention;
475     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478                                instruction_,
479                                instruction_->GetDexPc(),
480                                this);
481     CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483 
484     RestoreLiveRegisters(codegen, locations);
485     __ jmp(GetExitLabel());
486   }
487 
488  private:
489   const Location out_;
490   const Location obj_;
491 
492   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493 };
494 
495 // Slow path generating a read barrier for a heap reference.
496 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)498   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499                                             Location out,
500                                             Location ref,
501                                             Location obj,
502                                             uint32_t offset,
503                                             Location index)
504       : SlowPathCode(instruction),
505         out_(out),
506         ref_(ref),
507         obj_(obj),
508         offset_(offset),
509         index_(index) {
510     DCHECK(kEmitCompilerReadBarrier);
511     // If `obj` is equal to `out` or `ref`, it means the initial
512     // object has been overwritten by (or after) the heap object
513     // reference load to be instrumented, e.g.:
514     //
515     //   __ movl(out, Address(out, offset));
516     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517     //
518     // In that case, we have lost the information about the original
519     // object, and the emitted read barrier cannot work properly.
520     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522 }
523 
EmitNativeCode(CodeGenerator * codegen)524   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526     LocationSummary* locations = instruction_->GetLocations();
527     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528     DCHECK(locations->CanCall());
529     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530     DCHECK(!instruction_->IsInvoke() ||
531            (instruction_->IsInvokeStaticOrDirect() &&
532             instruction_->GetLocations()->Intrinsified()))
533         << "Unexpected instruction in read barrier for heap reference slow path: "
534         << instruction_->DebugName();
535 
536     __ Bind(GetEntryLabel());
537     SaveLiveRegisters(codegen, locations);
538 
539     // We may have to change the index's value, but as `index_` is a
540     // constant member (like other "inputs" of this slow path),
541     // introduce a copy of it, `index`.
542     Location index = index_;
543     if (index_.IsValid()) {
544       // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545       if (instruction_->IsArrayGet()) {
546         // Compute real offset and store it in index_.
547         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550           // We are about to change the value of `index_reg` (see the
551           // calls to art::x86_64::X86_64Assembler::shll and
552           // art::x86_64::X86_64Assembler::AddImmediate below), but it
553           // has not been saved by the previous call to
554           // art::SlowPathCode::SaveLiveRegisters, as it is a
555           // callee-save register --
556           // art::SlowPathCode::SaveLiveRegisters does not consider
557           // callee-save registers, as it has been designed with the
558           // assumption that callee-save registers are supposed to be
559           // handled by the called function.  So, as a callee-save
560           // register, `index_reg` _would_ eventually be saved onto
561           // the stack, but it would be too late: we would have
562           // changed its value earlier.  Therefore, we manually save
563           // it here into another freely available register,
564           // `free_reg`, chosen of course among the caller-save
565           // registers (as a callee-save `free_reg` register would
566           // exhibit the same problem).
567           //
568           // Note we could have requested a temporary register from
569           // the register allocator instead; but we prefer not to, as
570           // this is a slow path, and we know we can find a
571           // caller-save register that is available.
572           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574           index_reg = free_reg;
575           index = Location::RegisterLocation(index_reg);
576         } else {
577           // The initial register stored in `index_` has already been
578           // saved in the call to art::SlowPathCode::SaveLiveRegisters
579           // (as it is not a callee-save register), so we can freely
580           // use it.
581         }
582         // Shifting the index value contained in `index_reg` by the
583         // scale factor (2) cannot overflow in practice, as the
584         // runtime is unable to allocate object arrays with a size
585         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587         static_assert(
588             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591       } else {
592         DCHECK(instruction_->IsInvoke());
593         DCHECK(instruction_->GetLocations()->Intrinsified());
594         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596             << instruction_->AsInvoke()->GetIntrinsic();
597         DCHECK_EQ(offset_, 0U);
598         DCHECK(index_.IsRegister());
599       }
600     }
601 
602     // We're moving two or three locations to locations that could
603     // overlap, so we need a parallel move resolver.
604     InvokeRuntimeCallingConvention calling_convention;
605     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606     parallel_move.AddMove(ref_,
607                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608                           Primitive::kPrimNot,
609                           nullptr);
610     parallel_move.AddMove(obj_,
611                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612                           Primitive::kPrimNot,
613                           nullptr);
614     if (index.IsValid()) {
615       parallel_move.AddMove(index,
616                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617                             Primitive::kPrimInt,
618                             nullptr);
619       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
620     } else {
621       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
622       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623     }
624     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625                                   instruction_,
626                                   instruction_->GetDexPc(),
627                                   this);
628     CheckEntrypointTypes<
629         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631 
632     RestoreLiveRegisters(codegen, locations);
633     __ jmp(GetExitLabel());
634   }
635 
GetDescription() const636   const char* GetDescription() const OVERRIDE {
637     return "ReadBarrierForHeapReferenceSlowPathX86_64";
638   }
639 
640  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)641   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646         return static_cast<CpuRegister>(i);
647       }
648     }
649     // We shall never fail to find a free caller-save register, as
650     // there are more than two core caller-save registers on x86-64
651     // (meaning it is possible to find one which is different from
652     // `ref` and `obj`).
653     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654     LOG(FATAL) << "Could not find a free caller-save register";
655     UNREACHABLE();
656   }
657 
658   const Location out_;
659   const Location ref_;
660   const Location obj_;
661   const uint32_t offset_;
662   // An additional location containing an index to an array.
663   // Only used for HArrayGet and the UnsafeGetObject &
664   // UnsafeGetObjectVolatile intrinsics.
665   const Location index_;
666 
667   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668 };
669 
670 // Slow path generating a read barrier for a GC root.
671 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)673   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674       : SlowPathCode(instruction), out_(out), root_(root) {
675     DCHECK(kEmitCompilerReadBarrier);
676   }
677 
EmitNativeCode(CodeGenerator * codegen)678   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679     LocationSummary* locations = instruction_->GetLocations();
680     DCHECK(locations->CanCall());
681     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683         << "Unexpected instruction in read barrier for GC root slow path: "
684         << instruction_->DebugName();
685 
686     __ Bind(GetEntryLabel());
687     SaveLiveRegisters(codegen, locations);
688 
689     InvokeRuntimeCallingConvention calling_convention;
690     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693                                   instruction_,
694                                   instruction_->GetDexPc(),
695                                   this);
696     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698 
699     RestoreLiveRegisters(codegen, locations);
700     __ jmp(GetExitLabel());
701   }
702 
GetDescription() const703   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704 
705  private:
706   const Location out_;
707   const Location root_;
708 
709   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710 };
711 
712 #undef __
713 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
714 
X86_64IntegerCondition(IfCondition cond)715 inline Condition X86_64IntegerCondition(IfCondition cond) {
716   switch (cond) {
717     case kCondEQ: return kEqual;
718     case kCondNE: return kNotEqual;
719     case kCondLT: return kLess;
720     case kCondLE: return kLessEqual;
721     case kCondGT: return kGreater;
722     case kCondGE: return kGreaterEqual;
723     case kCondB:  return kBelow;
724     case kCondBE: return kBelowEqual;
725     case kCondA:  return kAbove;
726     case kCondAE: return kAboveEqual;
727   }
728   LOG(FATAL) << "Unreachable";
729   UNREACHABLE();
730 }
731 
732 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)733 inline Condition X86_64FPCondition(IfCondition cond) {
734   switch (cond) {
735     case kCondEQ: return kEqual;
736     case kCondNE: return kNotEqual;
737     case kCondLT: return kBelow;
738     case kCondLE: return kBelowEqual;
739     case kCondGT: return kAbove;
740     case kCondGE: return kAboveEqual;
741     default:      break;  // should not happen
742   };
743   LOG(FATAL) << "Unreachable";
744   UNREACHABLE();
745 }
746 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,MethodReference target_method ATTRIBUTE_UNUSED)747 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749       MethodReference target_method ATTRIBUTE_UNUSED) {
750   switch (desired_dispatch_info.code_ptr_location) {
751     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753       // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754       return HInvokeStaticOrDirect::DispatchInfo {
755         desired_dispatch_info.method_load_kind,
756         HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757         desired_dispatch_info.method_load_data,
758         0u
759       };
760     default:
761       return desired_dispatch_info;
762   }
763 }
764 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)765 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766                                                      Location temp) {
767   // All registers are assumed to be correctly set up.
768 
769   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
770   switch (invoke->GetMethodLoadKind()) {
771     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772       // temp = thread->string_init_entrypoint
773       __ gs()->movq(temp.AsRegister<CpuRegister>(),
774                     Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775       break;
776     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778       break;
779     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781       break;
782     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783       __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
784       method_patches_.emplace_back(invoke->GetTargetMethod());
785       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
786       break;
787     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788       __ movq(temp.AsRegister<CpuRegister>(),
789               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790       // Bind a new fixup label at the end of the "movl" insn.
791       uint32_t offset = invoke->GetDexCacheArrayOffset();
792       __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793       break;
794     }
795     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797       Register method_reg;
798       CpuRegister reg = temp.AsRegister<CpuRegister>();
799       if (current_method.IsRegister()) {
800         method_reg = current_method.AsRegister<Register>();
801       } else {
802         DCHECK(invoke->GetLocations()->Intrinsified());
803         DCHECK(!current_method.IsValid());
804         method_reg = reg.AsRegister();
805         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806       }
807       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808       __ movq(reg,
809               Address(CpuRegister(method_reg),
810                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811       // temp = temp[index_in_cache];
812       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813       uint32_t index_in_cache = invoke->GetDexMethodIndex();
814       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815       break;
816     }
817   }
818 
819   switch (invoke->GetCodePtrLocation()) {
820     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821       __ call(&frame_entry_label_);
822       break;
823     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825       Label* label = &relative_call_patches_.back().label;
826       __ call(label);  // Bind to the patch label, override at link time.
827       __ Bind(label);  // Bind the label at the end of the "call" insn.
828       break;
829     }
830     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832       // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833       LOG(FATAL) << "Unsupported";
834       UNREACHABLE();
835     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836       // (callee_method + offset_of_quick_compiled_code)()
837       __ call(Address(callee_method.AsRegister<CpuRegister>(),
838                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839                           kX86_64WordSize).SizeValue()));
840       break;
841   }
842 
843   DCHECK(!IsLeafMethod());
844 }
845 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)846 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850 
851   // Use the calling convention instead of the location of the receiver, as
852   // intrinsics may have put the receiver in a different register. In the intrinsics
853   // slow path, the arguments have been moved to the right place, so here we are
854   // guaranteed that the receiver is the first register of the calling convention.
855   InvokeDexCallingConvention calling_convention;
856   Register receiver = calling_convention.GetRegisterAt(0);
857 
858   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859   // /* HeapReference<Class> */ temp = receiver->klass_
860   __ movl(temp, Address(CpuRegister(receiver), class_offset));
861   MaybeRecordImplicitNullCheck(invoke);
862   // Instead of simply (possibly) unpoisoning `temp` here, we should
863   // emit a read barrier for the previous class reference load.
864   // However this is not required in practice, as this is an
865   // intermediate/temporary reference and because the current
866   // concurrent copying collector keeps the from-space memory
867   // intact/accessible until the end of the marking phase (the
868   // concurrent copying collector may not in the future).
869   __ MaybeUnpoisonHeapReference(temp);
870   // temp = temp->GetMethodAt(method_offset);
871   __ movq(temp, Address(temp, method_offset));
872   // call temp->GetEntryPoint();
873   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874       kX86_64WordSize).SizeValue()));
875 }
876 
RecordSimplePatch()877 void CodeGeneratorX86_64::RecordSimplePatch() {
878   if (GetCompilerOptions().GetIncludePatchInformation()) {
879     simple_patches_.emplace_back();
880     __ Bind(&simple_patches_.back());
881   }
882 }
883 
RecordStringPatch(HLoadString * load_string)884 void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886   __ Bind(&string_patches_.back().label);
887 }
888 
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)889 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890                                                             uint32_t element_offset) {
891   // Add a patch entry and return the label.
892   pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893   return &pc_relative_dex_cache_patches_.back().label;
894 }
895 
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)896 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897   DCHECK(linker_patches->empty());
898   size_t size =
899       method_patches_.size() +
900       relative_call_patches_.size() +
901       pc_relative_dex_cache_patches_.size() +
902       simple_patches_.size() +
903       string_patches_.size();
904   linker_patches->reserve(size);
905   // The label points to the end of the "movl" insn but the literal offset for method
906   // patch needs to point to the embedded constant which occupies the last 4 bytes.
907   constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908   for (const MethodPatchInfo<Label>& info : method_patches_) {
909     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911                                                        info.target_method.dex_file,
912                                                        info.target_method.dex_method_index));
913   }
914   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917                                                              info.target_method.dex_file,
918                                                              info.target_method.dex_method_index));
919   }
920   for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923                                                               &info.target_dex_file,
924                                                               info.label.Position(),
925                                                               info.element_offset));
926   }
927   for (const Label& label : simple_patches_) {
928     uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930   }
931   for (const StringPatchInfo<Label>& info : string_patches_) {
932     // These are always PC-relative, see GetSupportedLoadStringKind().
933     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934     linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935                                                                &info.dex_file,
936                                                                info.label.Position(),
937                                                                info.string_index));
938   }
939 }
940 
DumpCoreRegister(std::ostream & stream,int reg) const941 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942   stream << Register(reg);
943 }
944 
DumpFloatingPointRegister(std::ostream & stream,int reg) const945 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946   stream << FloatRegister(reg);
947 }
948 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)949 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951   return kX86_64WordSize;
952 }
953 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)954 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956   return kX86_64WordSize;
957 }
958 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)959 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960   __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961   return kX86_64WordSize;
962 }
963 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)964 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965   __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966   return kX86_64WordSize;
967 }
968 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)969 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970                                         HInstruction* instruction,
971                                         uint32_t dex_pc,
972                                         SlowPathCode* slow_path) {
973   InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974                 instruction,
975                 dex_pc,
976                 slow_path);
977 }
978 
InvokeRuntime(int32_t entry_point_offset,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)979 void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980                                         HInstruction* instruction,
981                                         uint32_t dex_pc,
982                                         SlowPathCode* slow_path) {
983   ValidateInvokeRuntime(instruction, slow_path);
984   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985   RecordPcInfo(instruction, dex_pc, slow_path);
986 }
987 
988 static constexpr int kNumberOfCpuRegisterPairs = 0;
989 // Use a fake return address register to mimic Quick.
990 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)991 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992                                          const X86_64InstructionSetFeatures& isa_features,
993                                          const CompilerOptions& compiler_options,
994                                          OptimizingCompilerStats* stats)
995       : CodeGenerator(graph,
996                       kNumberOfCpuRegisters,
997                       kNumberOfFloatRegisters,
998                       kNumberOfCpuRegisterPairs,
999                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000                                           arraysize(kCoreCalleeSaves))
1001                           | (1 << kFakeReturnRegister),
1002                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003                                           arraysize(kFpuCalleeSaves)),
1004                       compiler_options,
1005                       stats),
1006         block_labels_(nullptr),
1007         location_builder_(graph, this),
1008         instruction_visitor_(graph, this),
1009         move_resolver_(graph->GetArena(), this),
1010         assembler_(graph->GetArena()),
1011         isa_features_(isa_features),
1012         constant_area_start_(0),
1013         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020 }
1021 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1022 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023                                                                CodeGeneratorX86_64* codegen)
1024       : InstructionCodeGenerator(graph, codegen),
1025         assembler_(codegen->GetAssembler()),
1026         codegen_(codegen) {}
1027 
SetupBlockedRegisters() const1028 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029   // Stack register is always reserved.
1030   blocked_core_registers_[RSP] = true;
1031 
1032   // Block the register used as TMP.
1033   blocked_core_registers_[TMP] = true;
1034 }
1035 
DWARFReg(Register reg)1036 static dwarf::Reg DWARFReg(Register reg) {
1037   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038 }
1039 
DWARFReg(FloatRegister reg)1040 static dwarf::Reg DWARFReg(FloatRegister reg) {
1041   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042 }
1043 
GenerateFrameEntry()1044 void CodeGeneratorX86_64::GenerateFrameEntry() {
1045   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1046   __ Bind(&frame_entry_label_);
1047   bool skip_overflow_check = IsLeafMethod()
1048       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050 
1051   if (!skip_overflow_check) {
1052     __ testq(CpuRegister(RAX), Address(
1053         CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054     RecordPcInfo(nullptr, 0);
1055   }
1056 
1057   if (HasEmptyFrame()) {
1058     return;
1059   }
1060 
1061   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062     Register reg = kCoreCalleeSaves[i];
1063     if (allocated_registers_.ContainsCoreRegister(reg)) {
1064       __ pushq(CpuRegister(reg));
1065       __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066       __ cfi().RelOffset(DWARFReg(reg), 0);
1067     }
1068   }
1069 
1070   int adjust = GetFrameSize() - GetCoreSpillSize();
1071   __ subq(CpuRegister(RSP), Immediate(adjust));
1072   __ cfi().AdjustCFAOffset(adjust);
1073   uint32_t xmm_spill_location = GetFpuSpillStart();
1074   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075 
1076   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081     }
1082   }
1083 
1084   __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085           CpuRegister(kMethodRegisterArgument));
1086 }
1087 
GenerateFrameExit()1088 void CodeGeneratorX86_64::GenerateFrameExit() {
1089   __ cfi().RememberState();
1090   if (!HasEmptyFrame()) {
1091     uint32_t xmm_spill_location = GetFpuSpillStart();
1092     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098       }
1099     }
1100 
1101     int adjust = GetFrameSize() - GetCoreSpillSize();
1102     __ addq(CpuRegister(RSP), Immediate(adjust));
1103     __ cfi().AdjustCFAOffset(-adjust);
1104 
1105     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106       Register reg = kCoreCalleeSaves[i];
1107       if (allocated_registers_.ContainsCoreRegister(reg)) {
1108         __ popq(CpuRegister(reg));
1109         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110         __ cfi().Restore(DWARFReg(reg));
1111       }
1112     }
1113   }
1114   __ ret();
1115   __ cfi().RestoreState();
1116   __ cfi().DefCFAOffset(GetFrameSize());
1117 }
1118 
Bind(HBasicBlock * block)1119 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120   __ Bind(GetLabelOf(block));
1121 }
1122 
Move(Location destination,Location source)1123 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124   if (source.Equals(destination)) {
1125     return;
1126   }
1127   if (destination.IsRegister()) {
1128     CpuRegister dest = destination.AsRegister<CpuRegister>();
1129     if (source.IsRegister()) {
1130       __ movq(dest, source.AsRegister<CpuRegister>());
1131     } else if (source.IsFpuRegister()) {
1132       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133     } else if (source.IsStackSlot()) {
1134       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135     } else if (source.IsConstant()) {
1136       HConstant* constant = source.GetConstant();
1137       if (constant->IsLongConstant()) {
1138         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139       } else {
1140         Load32BitValue(dest, GetInt32ValueOf(constant));
1141       }
1142     } else {
1143       DCHECK(source.IsDoubleStackSlot());
1144       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145     }
1146   } else if (destination.IsFpuRegister()) {
1147     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148     if (source.IsRegister()) {
1149       __ movd(dest, source.AsRegister<CpuRegister>());
1150     } else if (source.IsFpuRegister()) {
1151       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152     } else if (source.IsConstant()) {
1153       HConstant* constant = source.GetConstant();
1154       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155       if (constant->IsFloatConstant()) {
1156         Load32BitValue(dest, static_cast<int32_t>(value));
1157       } else {
1158         Load64BitValue(dest, value);
1159       }
1160     } else if (source.IsStackSlot()) {
1161       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162     } else {
1163       DCHECK(source.IsDoubleStackSlot());
1164       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165     }
1166   } else if (destination.IsStackSlot()) {
1167     if (source.IsRegister()) {
1168       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169               source.AsRegister<CpuRegister>());
1170     } else if (source.IsFpuRegister()) {
1171       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172                source.AsFpuRegister<XmmRegister>());
1173     } else if (source.IsConstant()) {
1174       HConstant* constant = source.GetConstant();
1175       int32_t value = GetInt32ValueOf(constant);
1176       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177     } else {
1178       DCHECK(source.IsStackSlot()) << source;
1179       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181     }
1182   } else {
1183     DCHECK(destination.IsDoubleStackSlot());
1184     if (source.IsRegister()) {
1185       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186               source.AsRegister<CpuRegister>());
1187     } else if (source.IsFpuRegister()) {
1188       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189                source.AsFpuRegister<XmmRegister>());
1190     } else if (source.IsConstant()) {
1191       HConstant* constant = source.GetConstant();
1192       int64_t value;
1193       if (constant->IsDoubleConstant()) {
1194         value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195       } else {
1196         DCHECK(constant->IsLongConstant());
1197         value = constant->AsLongConstant()->GetValue();
1198       }
1199       Store64BitValueToStack(destination, value);
1200     } else {
1201       DCHECK(source.IsDoubleStackSlot());
1202       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204     }
1205   }
1206 }
1207 
MoveConstant(Location location,int32_t value)1208 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209   DCHECK(location.IsRegister());
1210   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211 }
1212 
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1213 void CodeGeneratorX86_64::MoveLocation(
1214     Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215   Move(dst, src);
1216 }
1217 
AddLocationAsTemp(Location location,LocationSummary * locations)1218 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219   if (location.IsRegister()) {
1220     locations->AddTemp(location);
1221   } else {
1222     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223   }
1224 }
1225 
HandleGoto(HInstruction * got,HBasicBlock * successor)1226 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227   DCHECK(!successor->IsExitBlock());
1228 
1229   HBasicBlock* block = got->GetBlock();
1230   HInstruction* previous = got->GetPrevious();
1231 
1232   HLoopInformation* info = block->GetLoopInformation();
1233   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235     return;
1236   }
1237 
1238   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240   }
1241   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242     __ jmp(codegen_->GetLabelOf(successor));
1243   }
1244 }
1245 
VisitGoto(HGoto * got)1246 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247   got->SetLocations(nullptr);
1248 }
1249 
VisitGoto(HGoto * got)1250 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251   HandleGoto(got, got->GetSuccessor());
1252 }
1253 
VisitTryBoundary(HTryBoundary * try_boundary)1254 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255   try_boundary->SetLocations(nullptr);
1256 }
1257 
VisitTryBoundary(HTryBoundary * try_boundary)1258 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260   if (!successor->IsExitBlock()) {
1261     HandleGoto(try_boundary, successor);
1262   }
1263 }
1264 
VisitExit(HExit * exit)1265 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266   exit->SetLocations(nullptr);
1267 }
1268 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1269 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270 }
1271 
1272 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1273 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274                                                      LabelType* true_label,
1275                                                      LabelType* false_label) {
1276   if (cond->IsFPConditionTrueIfNaN()) {
1277     __ j(kUnordered, true_label);
1278   } else if (cond->IsFPConditionFalseIfNaN()) {
1279     __ j(kUnordered, false_label);
1280   }
1281   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282 }
1283 
GenerateCompareTest(HCondition * condition)1284 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285   LocationSummary* locations = condition->GetLocations();
1286 
1287   Location left = locations->InAt(0);
1288   Location right = locations->InAt(1);
1289   Primitive::Type type = condition->InputAt(0)->GetType();
1290   switch (type) {
1291     case Primitive::kPrimBoolean:
1292     case Primitive::kPrimByte:
1293     case Primitive::kPrimChar:
1294     case Primitive::kPrimShort:
1295     case Primitive::kPrimInt:
1296     case Primitive::kPrimNot: {
1297       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298       if (right.IsConstant()) {
1299         int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300         if (value == 0) {
1301           __ testl(left_reg, left_reg);
1302         } else {
1303           __ cmpl(left_reg, Immediate(value));
1304         }
1305       } else if (right.IsStackSlot()) {
1306         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307       } else {
1308         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309       }
1310       break;
1311     }
1312     case Primitive::kPrimLong: {
1313       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314       if (right.IsConstant()) {
1315         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316         codegen_->Compare64BitValue(left_reg, value);
1317       } else if (right.IsDoubleStackSlot()) {
1318         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319       } else {
1320         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321       }
1322       break;
1323     }
1324     case Primitive::kPrimFloat: {
1325       if (right.IsFpuRegister()) {
1326         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327       } else if (right.IsConstant()) {
1328         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329                    codegen_->LiteralFloatAddress(
1330                      right.GetConstant()->AsFloatConstant()->GetValue()));
1331       } else {
1332         DCHECK(right.IsStackSlot());
1333         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334                    Address(CpuRegister(RSP), right.GetStackIndex()));
1335       }
1336       break;
1337     }
1338     case Primitive::kPrimDouble: {
1339       if (right.IsFpuRegister()) {
1340         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341       } else if (right.IsConstant()) {
1342         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343                    codegen_->LiteralDoubleAddress(
1344                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1345       } else {
1346         DCHECK(right.IsDoubleStackSlot());
1347         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348                    Address(CpuRegister(RSP), right.GetStackIndex()));
1349       }
1350       break;
1351     }
1352     default:
1353       LOG(FATAL) << "Unexpected condition type " << type;
1354   }
1355 }
1356 
1357 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1358 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359                                                                   LabelType* true_target_in,
1360                                                                   LabelType* false_target_in) {
1361   // Generated branching requires both targets to be explicit. If either of the
1362   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363   LabelType fallthrough_target;
1364   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366 
1367   // Generate the comparison to set the CC.
1368   GenerateCompareTest(condition);
1369 
1370   // Now generate the correct jump(s).
1371   Primitive::Type type = condition->InputAt(0)->GetType();
1372   switch (type) {
1373     case Primitive::kPrimLong: {
1374       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375       break;
1376     }
1377     case Primitive::kPrimFloat: {
1378       GenerateFPJumps(condition, true_target, false_target);
1379       break;
1380     }
1381     case Primitive::kPrimDouble: {
1382       GenerateFPJumps(condition, true_target, false_target);
1383       break;
1384     }
1385     default:
1386       LOG(FATAL) << "Unexpected condition type " << type;
1387   }
1388 
1389   if (false_target != &fallthrough_target) {
1390     __ jmp(false_target);
1391   }
1392 
1393   if (fallthrough_target.IsLinked()) {
1394     __ Bind(&fallthrough_target);
1395   }
1396 }
1397 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1398 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400   // are set only strictly before `branch`. We can't use the eflags on long
1401   // conditions if they are materialized due to the complex branching.
1402   return cond->IsCondition() &&
1403          cond->GetNext() == branch &&
1404          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405 }
1406 
1407 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1408 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409                                                            size_t condition_input_index,
1410                                                            LabelType* true_target,
1411                                                            LabelType* false_target) {
1412   HInstruction* cond = instruction->InputAt(condition_input_index);
1413 
1414   if (true_target == nullptr && false_target == nullptr) {
1415     // Nothing to do. The code always falls through.
1416     return;
1417   } else if (cond->IsIntConstant()) {
1418     // Constant condition, statically compared against "true" (integer value 1).
1419     if (cond->AsIntConstant()->IsTrue()) {
1420       if (true_target != nullptr) {
1421         __ jmp(true_target);
1422       }
1423     } else {
1424       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425       if (false_target != nullptr) {
1426         __ jmp(false_target);
1427       }
1428     }
1429     return;
1430   }
1431 
1432   // The following code generates these patterns:
1433   //  (1) true_target == nullptr && false_target != nullptr
1434   //        - opposite condition true => branch to false_target
1435   //  (2) true_target != nullptr && false_target == nullptr
1436   //        - condition true => branch to true_target
1437   //  (3) true_target != nullptr && false_target != nullptr
1438   //        - condition true => branch to true_target
1439   //        - branch to false_target
1440   if (IsBooleanValueOrMaterializedCondition(cond)) {
1441     if (AreEflagsSetFrom(cond, instruction)) {
1442       if (true_target == nullptr) {
1443         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444       } else {
1445         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446       }
1447     } else {
1448       // Materialized condition, compare against 0.
1449       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450       if (lhs.IsRegister()) {
1451         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452       } else {
1453         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454       }
1455       if (true_target == nullptr) {
1456         __ j(kEqual, false_target);
1457       } else {
1458         __ j(kNotEqual, true_target);
1459       }
1460     }
1461   } else {
1462     // Condition has not been materialized, use its inputs as the
1463     // comparison and its condition as the branch condition.
1464     HCondition* condition = cond->AsCondition();
1465 
1466     // If this is a long or FP comparison that has been folded into
1467     // the HCondition, generate the comparison directly.
1468     Primitive::Type type = condition->InputAt(0)->GetType();
1469     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470       GenerateCompareTestAndBranch(condition, true_target, false_target);
1471       return;
1472     }
1473 
1474     Location lhs = condition->GetLocations()->InAt(0);
1475     Location rhs = condition->GetLocations()->InAt(1);
1476     if (rhs.IsRegister()) {
1477       __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478     } else if (rhs.IsConstant()) {
1479       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480       codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481     } else {
1482       __ cmpl(lhs.AsRegister<CpuRegister>(),
1483               Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484     }
1485       if (true_target == nullptr) {
1486       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487     } else {
1488       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489     }
1490   }
1491 
1492   // If neither branch falls through (case 3), the conditional branch to `true_target`
1493   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494   if (true_target != nullptr && false_target != nullptr) {
1495     __ jmp(false_target);
1496   }
1497 }
1498 
VisitIf(HIf * if_instr)1499 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502     locations->SetInAt(0, Location::Any());
1503   }
1504 }
1505 
VisitIf(HIf * if_instr)1506 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510       nullptr : codegen_->GetLabelOf(true_successor);
1511   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512       nullptr : codegen_->GetLabelOf(false_successor);
1513   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514 }
1515 
VisitDeoptimize(HDeoptimize * deoptimize)1516 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517   LocationSummary* locations = new (GetGraph()->GetArena())
1518       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520     locations->SetInAt(0, Location::Any());
1521   }
1522 }
1523 
VisitDeoptimize(HDeoptimize * deoptimize)1524 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526   GenerateTestAndBranch<Label>(deoptimize,
1527                                /* condition_input_index */ 0,
1528                                slow_path->GetEntryLabel(),
1529                                /* false_target */ nullptr);
1530 }
1531 
SelectCanUseCMOV(HSelect * select)1532 static bool SelectCanUseCMOV(HSelect* select) {
1533   // There are no conditional move instructions for XMMs.
1534   if (Primitive::IsFloatingPointType(select->GetType())) {
1535     return false;
1536   }
1537 
1538   // A FP condition doesn't generate the single CC that we need.
1539   HInstruction* condition = select->GetCondition();
1540   if (condition->IsCondition() &&
1541       Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542     return false;
1543   }
1544 
1545   // We can generate a CMOV for this Select.
1546   return true;
1547 }
1548 
VisitSelect(HSelect * select)1549 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551   if (Primitive::IsFloatingPointType(select->GetType())) {
1552     locations->SetInAt(0, Location::RequiresFpuRegister());
1553     locations->SetInAt(1, Location::Any());
1554   } else {
1555     locations->SetInAt(0, Location::RequiresRegister());
1556     if (SelectCanUseCMOV(select)) {
1557       if (select->InputAt(1)->IsConstant()) {
1558         locations->SetInAt(1, Location::RequiresRegister());
1559       } else {
1560         locations->SetInAt(1, Location::Any());
1561       }
1562     } else {
1563       locations->SetInAt(1, Location::Any());
1564     }
1565   }
1566   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567     locations->SetInAt(2, Location::RequiresRegister());
1568   }
1569   locations->SetOut(Location::SameAsFirstInput());
1570 }
1571 
VisitSelect(HSelect * select)1572 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573   LocationSummary* locations = select->GetLocations();
1574   if (SelectCanUseCMOV(select)) {
1575     // If both the condition and the source types are integer, we can generate
1576     // a CMOV to implement Select.
1577     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578     Location value_true_loc = locations->InAt(1);
1579     DCHECK(locations->InAt(0).Equals(locations->Out()));
1580 
1581     HInstruction* select_condition = select->GetCondition();
1582     Condition cond = kNotEqual;
1583 
1584     // Figure out how to test the 'condition'.
1585     if (select_condition->IsCondition()) {
1586       HCondition* condition = select_condition->AsCondition();
1587       if (!condition->IsEmittedAtUseSite()) {
1588         // This was a previously materialized condition.
1589         // Can we use the existing condition code?
1590         if (AreEflagsSetFrom(condition, select)) {
1591           // Materialization was the previous instruction.  Condition codes are right.
1592           cond = X86_64IntegerCondition(condition->GetCondition());
1593         } else {
1594           // No, we have to recreate the condition code.
1595           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596           __ testl(cond_reg, cond_reg);
1597         }
1598       } else {
1599         GenerateCompareTest(condition);
1600         cond = X86_64IntegerCondition(condition->GetCondition());
1601       }
1602     } else {
1603       // Must be a boolean condition, which needs to be compared to 0.
1604       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605       __ testl(cond_reg, cond_reg);
1606     }
1607 
1608     // If the condition is true, overwrite the output, which already contains false.
1609     // Generate the correct sized CMOV.
1610     bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611     if (value_true_loc.IsRegister()) {
1612       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613     } else {
1614       __ cmov(cond,
1615               value_false,
1616               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617     }
1618   } else {
1619     NearLabel false_target;
1620     GenerateTestAndBranch<NearLabel>(select,
1621                                      /* condition_input_index */ 2,
1622                                      /* true_target */ nullptr,
1623                                      &false_target);
1624     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625     __ Bind(&false_target);
1626   }
1627 }
1628 
VisitNativeDebugInfo(HNativeDebugInfo * info)1629 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630   new (GetGraph()->GetArena()) LocationSummary(info);
1631 }
1632 
VisitNativeDebugInfo(HNativeDebugInfo *)1633 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635 }
1636 
GenerateNop()1637 void CodeGeneratorX86_64::GenerateNop() {
1638   __ nop();
1639 }
1640 
HandleCondition(HCondition * cond)1641 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642   LocationSummary* locations =
1643       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644   // Handle the long/FP comparisons made in instruction simplification.
1645   switch (cond->InputAt(0)->GetType()) {
1646     case Primitive::kPrimLong:
1647       locations->SetInAt(0, Location::RequiresRegister());
1648       locations->SetInAt(1, Location::Any());
1649       break;
1650     case Primitive::kPrimFloat:
1651     case Primitive::kPrimDouble:
1652       locations->SetInAt(0, Location::RequiresFpuRegister());
1653       locations->SetInAt(1, Location::Any());
1654       break;
1655     default:
1656       locations->SetInAt(0, Location::RequiresRegister());
1657       locations->SetInAt(1, Location::Any());
1658       break;
1659   }
1660   if (!cond->IsEmittedAtUseSite()) {
1661     locations->SetOut(Location::RequiresRegister());
1662   }
1663 }
1664 
HandleCondition(HCondition * cond)1665 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666   if (cond->IsEmittedAtUseSite()) {
1667     return;
1668   }
1669 
1670   LocationSummary* locations = cond->GetLocations();
1671   Location lhs = locations->InAt(0);
1672   Location rhs = locations->InAt(1);
1673   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674   NearLabel true_label, false_label;
1675 
1676   switch (cond->InputAt(0)->GetType()) {
1677     default:
1678       // Integer case.
1679 
1680       // Clear output register: setcc only sets the low byte.
1681       __ xorl(reg, reg);
1682 
1683       if (rhs.IsRegister()) {
1684         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685       } else if (rhs.IsConstant()) {
1686         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687         codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688       } else {
1689         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690       }
1691       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692       return;
1693     case Primitive::kPrimLong:
1694       // Clear output register: setcc only sets the low byte.
1695       __ xorl(reg, reg);
1696 
1697       if (rhs.IsRegister()) {
1698         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699       } else if (rhs.IsConstant()) {
1700         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701         codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702       } else {
1703         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704       }
1705       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706       return;
1707     case Primitive::kPrimFloat: {
1708       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709       if (rhs.IsConstant()) {
1710         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712       } else if (rhs.IsStackSlot()) {
1713         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714       } else {
1715         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716       }
1717       GenerateFPJumps(cond, &true_label, &false_label);
1718       break;
1719     }
1720     case Primitive::kPrimDouble: {
1721       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722       if (rhs.IsConstant()) {
1723         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725       } else if (rhs.IsDoubleStackSlot()) {
1726         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727       } else {
1728         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729       }
1730       GenerateFPJumps(cond, &true_label, &false_label);
1731       break;
1732     }
1733   }
1734 
1735   // Convert the jumps into the result.
1736   NearLabel done_label;
1737 
1738   // False case: result = 0.
1739   __ Bind(&false_label);
1740   __ xorl(reg, reg);
1741   __ jmp(&done_label);
1742 
1743   // True case: result = 1.
1744   __ Bind(&true_label);
1745   __ movl(reg, Immediate(1));
1746   __ Bind(&done_label);
1747 }
1748 
VisitEqual(HEqual * comp)1749 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750   HandleCondition(comp);
1751 }
1752 
VisitEqual(HEqual * comp)1753 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754   HandleCondition(comp);
1755 }
1756 
VisitNotEqual(HNotEqual * comp)1757 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758   HandleCondition(comp);
1759 }
1760 
VisitNotEqual(HNotEqual * comp)1761 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762   HandleCondition(comp);
1763 }
1764 
VisitLessThan(HLessThan * comp)1765 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766   HandleCondition(comp);
1767 }
1768 
VisitLessThan(HLessThan * comp)1769 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770   HandleCondition(comp);
1771 }
1772 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1773 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774   HandleCondition(comp);
1775 }
1776 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1777 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778   HandleCondition(comp);
1779 }
1780 
VisitGreaterThan(HGreaterThan * comp)1781 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782   HandleCondition(comp);
1783 }
1784 
VisitGreaterThan(HGreaterThan * comp)1785 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786   HandleCondition(comp);
1787 }
1788 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1789 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790   HandleCondition(comp);
1791 }
1792 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1793 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794   HandleCondition(comp);
1795 }
1796 
VisitBelow(HBelow * comp)1797 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798   HandleCondition(comp);
1799 }
1800 
VisitBelow(HBelow * comp)1801 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802   HandleCondition(comp);
1803 }
1804 
VisitBelowOrEqual(HBelowOrEqual * comp)1805 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806   HandleCondition(comp);
1807 }
1808 
VisitBelowOrEqual(HBelowOrEqual * comp)1809 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810   HandleCondition(comp);
1811 }
1812 
VisitAbove(HAbove * comp)1813 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814   HandleCondition(comp);
1815 }
1816 
VisitAbove(HAbove * comp)1817 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818   HandleCondition(comp);
1819 }
1820 
VisitAboveOrEqual(HAboveOrEqual * comp)1821 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822   HandleCondition(comp);
1823 }
1824 
VisitAboveOrEqual(HAboveOrEqual * comp)1825 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826   HandleCondition(comp);
1827 }
1828 
VisitCompare(HCompare * compare)1829 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830   LocationSummary* locations =
1831       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832   switch (compare->InputAt(0)->GetType()) {
1833     case Primitive::kPrimBoolean:
1834     case Primitive::kPrimByte:
1835     case Primitive::kPrimShort:
1836     case Primitive::kPrimChar:
1837     case Primitive::kPrimInt:
1838     case Primitive::kPrimLong: {
1839       locations->SetInAt(0, Location::RequiresRegister());
1840       locations->SetInAt(1, Location::Any());
1841       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842       break;
1843     }
1844     case Primitive::kPrimFloat:
1845     case Primitive::kPrimDouble: {
1846       locations->SetInAt(0, Location::RequiresFpuRegister());
1847       locations->SetInAt(1, Location::Any());
1848       locations->SetOut(Location::RequiresRegister());
1849       break;
1850     }
1851     default:
1852       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853   }
1854 }
1855 
VisitCompare(HCompare * compare)1856 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857   LocationSummary* locations = compare->GetLocations();
1858   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859   Location left = locations->InAt(0);
1860   Location right = locations->InAt(1);
1861 
1862   NearLabel less, greater, done;
1863   Primitive::Type type = compare->InputAt(0)->GetType();
1864   Condition less_cond = kLess;
1865 
1866   switch (type) {
1867     case Primitive::kPrimBoolean:
1868     case Primitive::kPrimByte:
1869     case Primitive::kPrimShort:
1870     case Primitive::kPrimChar:
1871     case Primitive::kPrimInt: {
1872       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873       if (right.IsConstant()) {
1874         int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875         codegen_->Compare32BitValue(left_reg, value);
1876       } else if (right.IsStackSlot()) {
1877         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878       } else {
1879         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880       }
1881       break;
1882     }
1883     case Primitive::kPrimLong: {
1884       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885       if (right.IsConstant()) {
1886         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887         codegen_->Compare64BitValue(left_reg, value);
1888       } else if (right.IsDoubleStackSlot()) {
1889         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890       } else {
1891         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892       }
1893       break;
1894     }
1895     case Primitive::kPrimFloat: {
1896       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897       if (right.IsConstant()) {
1898         float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900       } else if (right.IsStackSlot()) {
1901         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902       } else {
1903         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904       }
1905       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906       less_cond = kBelow;  //  ucomis{s,d} sets CF
1907       break;
1908     }
1909     case Primitive::kPrimDouble: {
1910       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911       if (right.IsConstant()) {
1912         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914       } else if (right.IsDoubleStackSlot()) {
1915         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916       } else {
1917         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918       }
1919       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920       less_cond = kBelow;  //  ucomis{s,d} sets CF
1921       break;
1922     }
1923     default:
1924       LOG(FATAL) << "Unexpected compare type " << type;
1925   }
1926 
1927   __ movl(out, Immediate(0));
1928   __ j(kEqual, &done);
1929   __ j(less_cond, &less);
1930 
1931   __ Bind(&greater);
1932   __ movl(out, Immediate(1));
1933   __ jmp(&done);
1934 
1935   __ Bind(&less);
1936   __ movl(out, Immediate(-1));
1937 
1938   __ Bind(&done);
1939 }
1940 
VisitIntConstant(HIntConstant * constant)1941 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942   LocationSummary* locations =
1943       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944   locations->SetOut(Location::ConstantLocation(constant));
1945 }
1946 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)1947 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948   // Will be generated at use site.
1949 }
1950 
VisitNullConstant(HNullConstant * constant)1951 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952   LocationSummary* locations =
1953       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954   locations->SetOut(Location::ConstantLocation(constant));
1955 }
1956 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)1957 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958   // Will be generated at use site.
1959 }
1960 
VisitLongConstant(HLongConstant * constant)1961 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962   LocationSummary* locations =
1963       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964   locations->SetOut(Location::ConstantLocation(constant));
1965 }
1966 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)1967 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968   // Will be generated at use site.
1969 }
1970 
VisitFloatConstant(HFloatConstant * constant)1971 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972   LocationSummary* locations =
1973       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974   locations->SetOut(Location::ConstantLocation(constant));
1975 }
1976 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)1977 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978   // Will be generated at use site.
1979 }
1980 
VisitDoubleConstant(HDoubleConstant * constant)1981 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982   LocationSummary* locations =
1983       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984   locations->SetOut(Location::ConstantLocation(constant));
1985 }
1986 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)1987 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989   // Will be generated at use site.
1990 }
1991 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1992 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993   memory_barrier->SetLocations(nullptr);
1994 }
1995 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1996 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998 }
1999 
VisitReturnVoid(HReturnVoid * ret)2000 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001   ret->SetLocations(nullptr);
2002 }
2003 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2004 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005   codegen_->GenerateFrameExit();
2006 }
2007 
VisitReturn(HReturn * ret)2008 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009   LocationSummary* locations =
2010       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011   switch (ret->InputAt(0)->GetType()) {
2012     case Primitive::kPrimBoolean:
2013     case Primitive::kPrimByte:
2014     case Primitive::kPrimChar:
2015     case Primitive::kPrimShort:
2016     case Primitive::kPrimInt:
2017     case Primitive::kPrimNot:
2018     case Primitive::kPrimLong:
2019       locations->SetInAt(0, Location::RegisterLocation(RAX));
2020       break;
2021 
2022     case Primitive::kPrimFloat:
2023     case Primitive::kPrimDouble:
2024       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025       break;
2026 
2027     default:
2028       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029   }
2030 }
2031 
VisitReturn(HReturn * ret)2032 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033   if (kIsDebugBuild) {
2034     switch (ret->InputAt(0)->GetType()) {
2035       case Primitive::kPrimBoolean:
2036       case Primitive::kPrimByte:
2037       case Primitive::kPrimChar:
2038       case Primitive::kPrimShort:
2039       case Primitive::kPrimInt:
2040       case Primitive::kPrimNot:
2041       case Primitive::kPrimLong:
2042         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043         break;
2044 
2045       case Primitive::kPrimFloat:
2046       case Primitive::kPrimDouble:
2047         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048                   XMM0);
2049         break;
2050 
2051       default:
2052         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053     }
2054   }
2055   codegen_->GenerateFrameExit();
2056 }
2057 
GetReturnLocation(Primitive::Type type) const2058 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059   switch (type) {
2060     case Primitive::kPrimBoolean:
2061     case Primitive::kPrimByte:
2062     case Primitive::kPrimChar:
2063     case Primitive::kPrimShort:
2064     case Primitive::kPrimInt:
2065     case Primitive::kPrimNot:
2066     case Primitive::kPrimLong:
2067       return Location::RegisterLocation(RAX);
2068 
2069     case Primitive::kPrimVoid:
2070       return Location::NoLocation();
2071 
2072     case Primitive::kPrimDouble:
2073     case Primitive::kPrimFloat:
2074       return Location::FpuRegisterLocation(XMM0);
2075   }
2076 
2077   UNREACHABLE();
2078 }
2079 
GetMethodLocation() const2080 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081   return Location::RegisterLocation(kMethodRegisterArgument);
2082 }
2083 
GetNextLocation(Primitive::Type type)2084 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085   switch (type) {
2086     case Primitive::kPrimBoolean:
2087     case Primitive::kPrimByte:
2088     case Primitive::kPrimChar:
2089     case Primitive::kPrimShort:
2090     case Primitive::kPrimInt:
2091     case Primitive::kPrimNot: {
2092       uint32_t index = gp_index_++;
2093       stack_index_++;
2094       if (index < calling_convention.GetNumberOfRegisters()) {
2095         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096       } else {
2097         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098       }
2099     }
2100 
2101     case Primitive::kPrimLong: {
2102       uint32_t index = gp_index_;
2103       stack_index_ += 2;
2104       if (index < calling_convention.GetNumberOfRegisters()) {
2105         gp_index_ += 1;
2106         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107       } else {
2108         gp_index_ += 2;
2109         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110       }
2111     }
2112 
2113     case Primitive::kPrimFloat: {
2114       uint32_t index = float_index_++;
2115       stack_index_++;
2116       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118       } else {
2119         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120       }
2121     }
2122 
2123     case Primitive::kPrimDouble: {
2124       uint32_t index = float_index_++;
2125       stack_index_ += 2;
2126       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128       } else {
2129         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130       }
2131     }
2132 
2133     case Primitive::kPrimVoid:
2134       LOG(FATAL) << "Unexpected parameter type " << type;
2135       break;
2136   }
2137   return Location::NoLocation();
2138 }
2139 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2140 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141   // The trampoline uses the same calling convention as dex calling conventions,
2142   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143   // the method_idx.
2144   HandleInvoke(invoke);
2145 }
2146 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2147 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149 }
2150 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2151 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152   // Explicit clinit checks triggered by static invokes must have been pruned by
2153   // art::PrepareForRegisterAllocation.
2154   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155 
2156   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157   if (intrinsic.TryDispatch(invoke)) {
2158     return;
2159   }
2160 
2161   HandleInvoke(invoke);
2162 }
2163 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2164 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165   if (invoke->GetLocations()->Intrinsified()) {
2166     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167     intrinsic.Dispatch(invoke);
2168     return true;
2169   }
2170   return false;
2171 }
2172 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2173 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174   // Explicit clinit checks triggered by static invokes must have been pruned by
2175   // art::PrepareForRegisterAllocation.
2176   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177 
2178   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179     return;
2180   }
2181 
2182   LocationSummary* locations = invoke->GetLocations();
2183   codegen_->GenerateStaticOrDirectCall(
2184       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186 }
2187 
HandleInvoke(HInvoke * invoke)2188 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191 }
2192 
VisitInvokeVirtual(HInvokeVirtual * invoke)2193 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195   if (intrinsic.TryDispatch(invoke)) {
2196     return;
2197   }
2198 
2199   HandleInvoke(invoke);
2200 }
2201 
VisitInvokeVirtual(HInvokeVirtual * invoke)2202 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204     return;
2205   }
2206 
2207   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208   DCHECK(!codegen_->IsLeafMethod());
2209   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210 }
2211 
VisitInvokeInterface(HInvokeInterface * invoke)2212 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213   HandleInvoke(invoke);
2214   // Add the hidden argument.
2215   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216 }
2217 
VisitInvokeInterface(HInvokeInterface * invoke)2218 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220   LocationSummary* locations = invoke->GetLocations();
2221   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
2224       invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
2225   Location receiver = locations->InAt(0);
2226   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2227 
2228   // Set the hidden argument. This is safe to do this here, as RAX
2229   // won't be modified thereafter, before the `call` instruction.
2230   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2231   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2232 
2233   if (receiver.IsStackSlot()) {
2234     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2235     // /* HeapReference<Class> */ temp = temp->klass_
2236     __ movl(temp, Address(temp, class_offset));
2237   } else {
2238     // /* HeapReference<Class> */ temp = receiver->klass_
2239     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2240   }
2241   codegen_->MaybeRecordImplicitNullCheck(invoke);
2242   // Instead of simply (possibly) unpoisoning `temp` here, we should
2243   // emit a read barrier for the previous class reference load.
2244   // However this is not required in practice, as this is an
2245   // intermediate/temporary reference and because the current
2246   // concurrent copying collector keeps the from-space memory
2247   // intact/accessible until the end of the marking phase (the
2248   // concurrent copying collector may not in the future).
2249   __ MaybeUnpoisonHeapReference(temp);
2250   // temp = temp->GetImtEntryAt(method_offset);
2251   __ movq(temp, Address(temp, method_offset));
2252   // call temp->GetEntryPoint();
2253   __ call(Address(temp,
2254                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2255 
2256   DCHECK(!codegen_->IsLeafMethod());
2257   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2258 }
2259 
VisitNeg(HNeg * neg)2260 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2261   LocationSummary* locations =
2262       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2263   switch (neg->GetResultType()) {
2264     case Primitive::kPrimInt:
2265     case Primitive::kPrimLong:
2266       locations->SetInAt(0, Location::RequiresRegister());
2267       locations->SetOut(Location::SameAsFirstInput());
2268       break;
2269 
2270     case Primitive::kPrimFloat:
2271     case Primitive::kPrimDouble:
2272       locations->SetInAt(0, Location::RequiresFpuRegister());
2273       locations->SetOut(Location::SameAsFirstInput());
2274       locations->AddTemp(Location::RequiresFpuRegister());
2275       break;
2276 
2277     default:
2278       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2279   }
2280 }
2281 
VisitNeg(HNeg * neg)2282 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2283   LocationSummary* locations = neg->GetLocations();
2284   Location out = locations->Out();
2285   Location in = locations->InAt(0);
2286   switch (neg->GetResultType()) {
2287     case Primitive::kPrimInt:
2288       DCHECK(in.IsRegister());
2289       DCHECK(in.Equals(out));
2290       __ negl(out.AsRegister<CpuRegister>());
2291       break;
2292 
2293     case Primitive::kPrimLong:
2294       DCHECK(in.IsRegister());
2295       DCHECK(in.Equals(out));
2296       __ negq(out.AsRegister<CpuRegister>());
2297       break;
2298 
2299     case Primitive::kPrimFloat: {
2300       DCHECK(in.Equals(out));
2301       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2302       // Implement float negation with an exclusive or with value
2303       // 0x80000000 (mask for bit 31, representing the sign of a
2304       // single-precision floating-point number).
2305       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2306       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2307       break;
2308     }
2309 
2310     case Primitive::kPrimDouble: {
2311       DCHECK(in.Equals(out));
2312       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2313       // Implement double negation with an exclusive or with value
2314       // 0x8000000000000000 (mask for bit 63, representing the sign of
2315       // a double-precision floating-point number).
2316       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2317       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2318       break;
2319     }
2320 
2321     default:
2322       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2323   }
2324 }
2325 
VisitTypeConversion(HTypeConversion * conversion)2326 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2327   LocationSummary* locations =
2328       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2329   Primitive::Type result_type = conversion->GetResultType();
2330   Primitive::Type input_type = conversion->GetInputType();
2331   DCHECK_NE(result_type, input_type);
2332 
2333   // The Java language does not allow treating boolean as an integral type but
2334   // our bit representation makes it safe.
2335 
2336   switch (result_type) {
2337     case Primitive::kPrimByte:
2338       switch (input_type) {
2339         case Primitive::kPrimLong:
2340           // Type conversion from long to byte is a result of code transformations.
2341         case Primitive::kPrimBoolean:
2342           // Boolean input is a result of code transformations.
2343         case Primitive::kPrimShort:
2344         case Primitive::kPrimInt:
2345         case Primitive::kPrimChar:
2346           // Processing a Dex `int-to-byte' instruction.
2347           locations->SetInAt(0, Location::Any());
2348           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2349           break;
2350 
2351         default:
2352           LOG(FATAL) << "Unexpected type conversion from " << input_type
2353                      << " to " << result_type;
2354       }
2355       break;
2356 
2357     case Primitive::kPrimShort:
2358       switch (input_type) {
2359         case Primitive::kPrimLong:
2360           // Type conversion from long to short is a result of code transformations.
2361         case Primitive::kPrimBoolean:
2362           // Boolean input is a result of code transformations.
2363         case Primitive::kPrimByte:
2364         case Primitive::kPrimInt:
2365         case Primitive::kPrimChar:
2366           // Processing a Dex `int-to-short' instruction.
2367           locations->SetInAt(0, Location::Any());
2368           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2369           break;
2370 
2371         default:
2372           LOG(FATAL) << "Unexpected type conversion from " << input_type
2373                      << " to " << result_type;
2374       }
2375       break;
2376 
2377     case Primitive::kPrimInt:
2378       switch (input_type) {
2379         case Primitive::kPrimLong:
2380           // Processing a Dex `long-to-int' instruction.
2381           locations->SetInAt(0, Location::Any());
2382           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2383           break;
2384 
2385         case Primitive::kPrimFloat:
2386           // Processing a Dex `float-to-int' instruction.
2387           locations->SetInAt(0, Location::RequiresFpuRegister());
2388           locations->SetOut(Location::RequiresRegister());
2389           break;
2390 
2391         case Primitive::kPrimDouble:
2392           // Processing a Dex `double-to-int' instruction.
2393           locations->SetInAt(0, Location::RequiresFpuRegister());
2394           locations->SetOut(Location::RequiresRegister());
2395           break;
2396 
2397         default:
2398           LOG(FATAL) << "Unexpected type conversion from " << input_type
2399                      << " to " << result_type;
2400       }
2401       break;
2402 
2403     case Primitive::kPrimLong:
2404       switch (input_type) {
2405         case Primitive::kPrimBoolean:
2406           // Boolean input is a result of code transformations.
2407         case Primitive::kPrimByte:
2408         case Primitive::kPrimShort:
2409         case Primitive::kPrimInt:
2410         case Primitive::kPrimChar:
2411           // Processing a Dex `int-to-long' instruction.
2412           // TODO: We would benefit from a (to-be-implemented)
2413           // Location::RegisterOrStackSlot requirement for this input.
2414           locations->SetInAt(0, Location::RequiresRegister());
2415           locations->SetOut(Location::RequiresRegister());
2416           break;
2417 
2418         case Primitive::kPrimFloat:
2419           // Processing a Dex `float-to-long' instruction.
2420           locations->SetInAt(0, Location::RequiresFpuRegister());
2421           locations->SetOut(Location::RequiresRegister());
2422           break;
2423 
2424         case Primitive::kPrimDouble:
2425           // Processing a Dex `double-to-long' instruction.
2426           locations->SetInAt(0, Location::RequiresFpuRegister());
2427           locations->SetOut(Location::RequiresRegister());
2428           break;
2429 
2430         default:
2431           LOG(FATAL) << "Unexpected type conversion from " << input_type
2432                      << " to " << result_type;
2433       }
2434       break;
2435 
2436     case Primitive::kPrimChar:
2437       switch (input_type) {
2438         case Primitive::kPrimLong:
2439           // Type conversion from long to char is a result of code transformations.
2440         case Primitive::kPrimBoolean:
2441           // Boolean input is a result of code transformations.
2442         case Primitive::kPrimByte:
2443         case Primitive::kPrimShort:
2444         case Primitive::kPrimInt:
2445           // Processing a Dex `int-to-char' instruction.
2446           locations->SetInAt(0, Location::Any());
2447           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2448           break;
2449 
2450         default:
2451           LOG(FATAL) << "Unexpected type conversion from " << input_type
2452                      << " to " << result_type;
2453       }
2454       break;
2455 
2456     case Primitive::kPrimFloat:
2457       switch (input_type) {
2458         case Primitive::kPrimBoolean:
2459           // Boolean input is a result of code transformations.
2460         case Primitive::kPrimByte:
2461         case Primitive::kPrimShort:
2462         case Primitive::kPrimInt:
2463         case Primitive::kPrimChar:
2464           // Processing a Dex `int-to-float' instruction.
2465           locations->SetInAt(0, Location::Any());
2466           locations->SetOut(Location::RequiresFpuRegister());
2467           break;
2468 
2469         case Primitive::kPrimLong:
2470           // Processing a Dex `long-to-float' instruction.
2471           locations->SetInAt(0, Location::Any());
2472           locations->SetOut(Location::RequiresFpuRegister());
2473           break;
2474 
2475         case Primitive::kPrimDouble:
2476           // Processing a Dex `double-to-float' instruction.
2477           locations->SetInAt(0, Location::Any());
2478           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2479           break;
2480 
2481         default:
2482           LOG(FATAL) << "Unexpected type conversion from " << input_type
2483                      << " to " << result_type;
2484       };
2485       break;
2486 
2487     case Primitive::kPrimDouble:
2488       switch (input_type) {
2489         case Primitive::kPrimBoolean:
2490           // Boolean input is a result of code transformations.
2491         case Primitive::kPrimByte:
2492         case Primitive::kPrimShort:
2493         case Primitive::kPrimInt:
2494         case Primitive::kPrimChar:
2495           // Processing a Dex `int-to-double' instruction.
2496           locations->SetInAt(0, Location::Any());
2497           locations->SetOut(Location::RequiresFpuRegister());
2498           break;
2499 
2500         case Primitive::kPrimLong:
2501           // Processing a Dex `long-to-double' instruction.
2502           locations->SetInAt(0, Location::Any());
2503           locations->SetOut(Location::RequiresFpuRegister());
2504           break;
2505 
2506         case Primitive::kPrimFloat:
2507           // Processing a Dex `float-to-double' instruction.
2508           locations->SetInAt(0, Location::Any());
2509           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2510           break;
2511 
2512         default:
2513           LOG(FATAL) << "Unexpected type conversion from " << input_type
2514                      << " to " << result_type;
2515       }
2516       break;
2517 
2518     default:
2519       LOG(FATAL) << "Unexpected type conversion from " << input_type
2520                  << " to " << result_type;
2521   }
2522 }
2523 
VisitTypeConversion(HTypeConversion * conversion)2524 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2525   LocationSummary* locations = conversion->GetLocations();
2526   Location out = locations->Out();
2527   Location in = locations->InAt(0);
2528   Primitive::Type result_type = conversion->GetResultType();
2529   Primitive::Type input_type = conversion->GetInputType();
2530   DCHECK_NE(result_type, input_type);
2531   switch (result_type) {
2532     case Primitive::kPrimByte:
2533       switch (input_type) {
2534         case Primitive::kPrimLong:
2535           // Type conversion from long to byte is a result of code transformations.
2536         case Primitive::kPrimBoolean:
2537           // Boolean input is a result of code transformations.
2538         case Primitive::kPrimShort:
2539         case Primitive::kPrimInt:
2540         case Primitive::kPrimChar:
2541           // Processing a Dex `int-to-byte' instruction.
2542           if (in.IsRegister()) {
2543             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2544           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2545             __ movsxb(out.AsRegister<CpuRegister>(),
2546                       Address(CpuRegister(RSP), in.GetStackIndex()));
2547           } else {
2548             __ movl(out.AsRegister<CpuRegister>(),
2549                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2550           }
2551           break;
2552 
2553         default:
2554           LOG(FATAL) << "Unexpected type conversion from " << input_type
2555                      << " to " << result_type;
2556       }
2557       break;
2558 
2559     case Primitive::kPrimShort:
2560       switch (input_type) {
2561         case Primitive::kPrimLong:
2562           // Type conversion from long to short is a result of code transformations.
2563         case Primitive::kPrimBoolean:
2564           // Boolean input is a result of code transformations.
2565         case Primitive::kPrimByte:
2566         case Primitive::kPrimInt:
2567         case Primitive::kPrimChar:
2568           // Processing a Dex `int-to-short' instruction.
2569           if (in.IsRegister()) {
2570             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2571           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2572             __ movsxw(out.AsRegister<CpuRegister>(),
2573                       Address(CpuRegister(RSP), in.GetStackIndex()));
2574           } else {
2575             __ movl(out.AsRegister<CpuRegister>(),
2576                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2577           }
2578           break;
2579 
2580         default:
2581           LOG(FATAL) << "Unexpected type conversion from " << input_type
2582                      << " to " << result_type;
2583       }
2584       break;
2585 
2586     case Primitive::kPrimInt:
2587       switch (input_type) {
2588         case Primitive::kPrimLong:
2589           // Processing a Dex `long-to-int' instruction.
2590           if (in.IsRegister()) {
2591             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2592           } else if (in.IsDoubleStackSlot()) {
2593             __ movl(out.AsRegister<CpuRegister>(),
2594                     Address(CpuRegister(RSP), in.GetStackIndex()));
2595           } else {
2596             DCHECK(in.IsConstant());
2597             DCHECK(in.GetConstant()->IsLongConstant());
2598             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2599             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2600           }
2601           break;
2602 
2603         case Primitive::kPrimFloat: {
2604           // Processing a Dex `float-to-int' instruction.
2605           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2606           CpuRegister output = out.AsRegister<CpuRegister>();
2607           NearLabel done, nan;
2608 
2609           __ movl(output, Immediate(kPrimIntMax));
2610           // if input >= (float)INT_MAX goto done
2611           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2612           __ j(kAboveEqual, &done);
2613           // if input == NaN goto nan
2614           __ j(kUnordered, &nan);
2615           // output = float-to-int-truncate(input)
2616           __ cvttss2si(output, input, false);
2617           __ jmp(&done);
2618           __ Bind(&nan);
2619           //  output = 0
2620           __ xorl(output, output);
2621           __ Bind(&done);
2622           break;
2623         }
2624 
2625         case Primitive::kPrimDouble: {
2626           // Processing a Dex `double-to-int' instruction.
2627           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2628           CpuRegister output = out.AsRegister<CpuRegister>();
2629           NearLabel done, nan;
2630 
2631           __ movl(output, Immediate(kPrimIntMax));
2632           // if input >= (double)INT_MAX goto done
2633           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2634           __ j(kAboveEqual, &done);
2635           // if input == NaN goto nan
2636           __ j(kUnordered, &nan);
2637           // output = double-to-int-truncate(input)
2638           __ cvttsd2si(output, input);
2639           __ jmp(&done);
2640           __ Bind(&nan);
2641           //  output = 0
2642           __ xorl(output, output);
2643           __ Bind(&done);
2644           break;
2645         }
2646 
2647         default:
2648           LOG(FATAL) << "Unexpected type conversion from " << input_type
2649                      << " to " << result_type;
2650       }
2651       break;
2652 
2653     case Primitive::kPrimLong:
2654       switch (input_type) {
2655         DCHECK(out.IsRegister());
2656         case Primitive::kPrimBoolean:
2657           // Boolean input is a result of code transformations.
2658         case Primitive::kPrimByte:
2659         case Primitive::kPrimShort:
2660         case Primitive::kPrimInt:
2661         case Primitive::kPrimChar:
2662           // Processing a Dex `int-to-long' instruction.
2663           DCHECK(in.IsRegister());
2664           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2665           break;
2666 
2667         case Primitive::kPrimFloat: {
2668           // Processing a Dex `float-to-long' instruction.
2669           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2670           CpuRegister output = out.AsRegister<CpuRegister>();
2671           NearLabel done, nan;
2672 
2673           codegen_->Load64BitValue(output, kPrimLongMax);
2674           // if input >= (float)LONG_MAX goto done
2675           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2676           __ j(kAboveEqual, &done);
2677           // if input == NaN goto nan
2678           __ j(kUnordered, &nan);
2679           // output = float-to-long-truncate(input)
2680           __ cvttss2si(output, input, true);
2681           __ jmp(&done);
2682           __ Bind(&nan);
2683           //  output = 0
2684           __ xorl(output, output);
2685           __ Bind(&done);
2686           break;
2687         }
2688 
2689         case Primitive::kPrimDouble: {
2690           // Processing a Dex `double-to-long' instruction.
2691           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2692           CpuRegister output = out.AsRegister<CpuRegister>();
2693           NearLabel done, nan;
2694 
2695           codegen_->Load64BitValue(output, kPrimLongMax);
2696           // if input >= (double)LONG_MAX goto done
2697           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2698           __ j(kAboveEqual, &done);
2699           // if input == NaN goto nan
2700           __ j(kUnordered, &nan);
2701           // output = double-to-long-truncate(input)
2702           __ cvttsd2si(output, input, true);
2703           __ jmp(&done);
2704           __ Bind(&nan);
2705           //  output = 0
2706           __ xorl(output, output);
2707           __ Bind(&done);
2708           break;
2709         }
2710 
2711         default:
2712           LOG(FATAL) << "Unexpected type conversion from " << input_type
2713                      << " to " << result_type;
2714       }
2715       break;
2716 
2717     case Primitive::kPrimChar:
2718       switch (input_type) {
2719         case Primitive::kPrimLong:
2720           // Type conversion from long to char is a result of code transformations.
2721         case Primitive::kPrimBoolean:
2722           // Boolean input is a result of code transformations.
2723         case Primitive::kPrimByte:
2724         case Primitive::kPrimShort:
2725         case Primitive::kPrimInt:
2726           // Processing a Dex `int-to-char' instruction.
2727           if (in.IsRegister()) {
2728             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2729           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2730             __ movzxw(out.AsRegister<CpuRegister>(),
2731                       Address(CpuRegister(RSP), in.GetStackIndex()));
2732           } else {
2733             __ movl(out.AsRegister<CpuRegister>(),
2734                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2735           }
2736           break;
2737 
2738         default:
2739           LOG(FATAL) << "Unexpected type conversion from " << input_type
2740                      << " to " << result_type;
2741       }
2742       break;
2743 
2744     case Primitive::kPrimFloat:
2745       switch (input_type) {
2746         case Primitive::kPrimBoolean:
2747           // Boolean input is a result of code transformations.
2748         case Primitive::kPrimByte:
2749         case Primitive::kPrimShort:
2750         case Primitive::kPrimInt:
2751         case Primitive::kPrimChar:
2752           // Processing a Dex `int-to-float' instruction.
2753           if (in.IsRegister()) {
2754             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2755           } else if (in.IsConstant()) {
2756             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2757             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2758             codegen_->Load32BitValue(dest, static_cast<float>(v));
2759           } else {
2760             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2761                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2762           }
2763           break;
2764 
2765         case Primitive::kPrimLong:
2766           // Processing a Dex `long-to-float' instruction.
2767           if (in.IsRegister()) {
2768             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2769           } else if (in.IsConstant()) {
2770             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2771             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2772             codegen_->Load32BitValue(dest, static_cast<float>(v));
2773           } else {
2774             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2775                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2776           }
2777           break;
2778 
2779         case Primitive::kPrimDouble:
2780           // Processing a Dex `double-to-float' instruction.
2781           if (in.IsFpuRegister()) {
2782             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2783           } else if (in.IsConstant()) {
2784             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2785             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2786             codegen_->Load32BitValue(dest, static_cast<float>(v));
2787           } else {
2788             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2789                         Address(CpuRegister(RSP), in.GetStackIndex()));
2790           }
2791           break;
2792 
2793         default:
2794           LOG(FATAL) << "Unexpected type conversion from " << input_type
2795                      << " to " << result_type;
2796       };
2797       break;
2798 
2799     case Primitive::kPrimDouble:
2800       switch (input_type) {
2801         case Primitive::kPrimBoolean:
2802           // Boolean input is a result of code transformations.
2803         case Primitive::kPrimByte:
2804         case Primitive::kPrimShort:
2805         case Primitive::kPrimInt:
2806         case Primitive::kPrimChar:
2807           // Processing a Dex `int-to-double' instruction.
2808           if (in.IsRegister()) {
2809             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2810           } else if (in.IsConstant()) {
2811             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2812             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2813             codegen_->Load64BitValue(dest, static_cast<double>(v));
2814           } else {
2815             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2816                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2817           }
2818           break;
2819 
2820         case Primitive::kPrimLong:
2821           // Processing a Dex `long-to-double' instruction.
2822           if (in.IsRegister()) {
2823             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2824           } else if (in.IsConstant()) {
2825             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2826             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2827             codegen_->Load64BitValue(dest, static_cast<double>(v));
2828           } else {
2829             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2830                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2831           }
2832           break;
2833 
2834         case Primitive::kPrimFloat:
2835           // Processing a Dex `float-to-double' instruction.
2836           if (in.IsFpuRegister()) {
2837             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2838           } else if (in.IsConstant()) {
2839             float v = in.GetConstant()->AsFloatConstant()->GetValue();
2840             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2841             codegen_->Load64BitValue(dest, static_cast<double>(v));
2842           } else {
2843             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2844                         Address(CpuRegister(RSP), in.GetStackIndex()));
2845           }
2846           break;
2847 
2848         default:
2849           LOG(FATAL) << "Unexpected type conversion from " << input_type
2850                      << " to " << result_type;
2851       };
2852       break;
2853 
2854     default:
2855       LOG(FATAL) << "Unexpected type conversion from " << input_type
2856                  << " to " << result_type;
2857   }
2858 }
2859 
VisitAdd(HAdd * add)2860 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2861   LocationSummary* locations =
2862       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2863   switch (add->GetResultType()) {
2864     case Primitive::kPrimInt: {
2865       locations->SetInAt(0, Location::RequiresRegister());
2866       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2867       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2868       break;
2869     }
2870 
2871     case Primitive::kPrimLong: {
2872       locations->SetInAt(0, Location::RequiresRegister());
2873       // We can use a leaq or addq if the constant can fit in an immediate.
2874       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2875       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2876       break;
2877     }
2878 
2879     case Primitive::kPrimDouble:
2880     case Primitive::kPrimFloat: {
2881       locations->SetInAt(0, Location::RequiresFpuRegister());
2882       locations->SetInAt(1, Location::Any());
2883       locations->SetOut(Location::SameAsFirstInput());
2884       break;
2885     }
2886 
2887     default:
2888       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2889   }
2890 }
2891 
VisitAdd(HAdd * add)2892 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2893   LocationSummary* locations = add->GetLocations();
2894   Location first = locations->InAt(0);
2895   Location second = locations->InAt(1);
2896   Location out = locations->Out();
2897 
2898   switch (add->GetResultType()) {
2899     case Primitive::kPrimInt: {
2900       if (second.IsRegister()) {
2901         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2902           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2903         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2904           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2905         } else {
2906           __ leal(out.AsRegister<CpuRegister>(), Address(
2907               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2908         }
2909       } else if (second.IsConstant()) {
2910         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2911           __ addl(out.AsRegister<CpuRegister>(),
2912                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2913         } else {
2914           __ leal(out.AsRegister<CpuRegister>(), Address(
2915               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2916         }
2917       } else {
2918         DCHECK(first.Equals(locations->Out()));
2919         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2920       }
2921       break;
2922     }
2923 
2924     case Primitive::kPrimLong: {
2925       if (second.IsRegister()) {
2926         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2927           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2928         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2929           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2930         } else {
2931           __ leaq(out.AsRegister<CpuRegister>(), Address(
2932               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2933         }
2934       } else {
2935         DCHECK(second.IsConstant());
2936         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2937         int32_t int32_value = Low32Bits(value);
2938         DCHECK_EQ(int32_value, value);
2939         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2940           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2941         } else {
2942           __ leaq(out.AsRegister<CpuRegister>(), Address(
2943               first.AsRegister<CpuRegister>(), int32_value));
2944         }
2945       }
2946       break;
2947     }
2948 
2949     case Primitive::kPrimFloat: {
2950       if (second.IsFpuRegister()) {
2951         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2952       } else if (second.IsConstant()) {
2953         __ addss(first.AsFpuRegister<XmmRegister>(),
2954                  codegen_->LiteralFloatAddress(
2955                      second.GetConstant()->AsFloatConstant()->GetValue()));
2956       } else {
2957         DCHECK(second.IsStackSlot());
2958         __ addss(first.AsFpuRegister<XmmRegister>(),
2959                  Address(CpuRegister(RSP), second.GetStackIndex()));
2960       }
2961       break;
2962     }
2963 
2964     case Primitive::kPrimDouble: {
2965       if (second.IsFpuRegister()) {
2966         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2967       } else if (second.IsConstant()) {
2968         __ addsd(first.AsFpuRegister<XmmRegister>(),
2969                  codegen_->LiteralDoubleAddress(
2970                      second.GetConstant()->AsDoubleConstant()->GetValue()));
2971       } else {
2972         DCHECK(second.IsDoubleStackSlot());
2973         __ addsd(first.AsFpuRegister<XmmRegister>(),
2974                  Address(CpuRegister(RSP), second.GetStackIndex()));
2975       }
2976       break;
2977     }
2978 
2979     default:
2980       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2981   }
2982 }
2983 
VisitSub(HSub * sub)2984 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2985   LocationSummary* locations =
2986       new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2987   switch (sub->GetResultType()) {
2988     case Primitive::kPrimInt: {
2989       locations->SetInAt(0, Location::RequiresRegister());
2990       locations->SetInAt(1, Location::Any());
2991       locations->SetOut(Location::SameAsFirstInput());
2992       break;
2993     }
2994     case Primitive::kPrimLong: {
2995       locations->SetInAt(0, Location::RequiresRegister());
2996       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
2997       locations->SetOut(Location::SameAsFirstInput());
2998       break;
2999     }
3000     case Primitive::kPrimFloat:
3001     case Primitive::kPrimDouble: {
3002       locations->SetInAt(0, Location::RequiresFpuRegister());
3003       locations->SetInAt(1, Location::Any());
3004       locations->SetOut(Location::SameAsFirstInput());
3005       break;
3006     }
3007     default:
3008       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3009   }
3010 }
3011 
VisitSub(HSub * sub)3012 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3013   LocationSummary* locations = sub->GetLocations();
3014   Location first = locations->InAt(0);
3015   Location second = locations->InAt(1);
3016   DCHECK(first.Equals(locations->Out()));
3017   switch (sub->GetResultType()) {
3018     case Primitive::kPrimInt: {
3019       if (second.IsRegister()) {
3020         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3021       } else if (second.IsConstant()) {
3022         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3023         __ subl(first.AsRegister<CpuRegister>(), imm);
3024       } else {
3025         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3026       }
3027       break;
3028     }
3029     case Primitive::kPrimLong: {
3030       if (second.IsConstant()) {
3031         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3032         DCHECK(IsInt<32>(value));
3033         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3034       } else {
3035         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3036       }
3037       break;
3038     }
3039 
3040     case Primitive::kPrimFloat: {
3041       if (second.IsFpuRegister()) {
3042         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3043       } else if (second.IsConstant()) {
3044         __ subss(first.AsFpuRegister<XmmRegister>(),
3045                  codegen_->LiteralFloatAddress(
3046                      second.GetConstant()->AsFloatConstant()->GetValue()));
3047       } else {
3048         DCHECK(second.IsStackSlot());
3049         __ subss(first.AsFpuRegister<XmmRegister>(),
3050                  Address(CpuRegister(RSP), second.GetStackIndex()));
3051       }
3052       break;
3053     }
3054 
3055     case Primitive::kPrimDouble: {
3056       if (second.IsFpuRegister()) {
3057         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3058       } else if (second.IsConstant()) {
3059         __ subsd(first.AsFpuRegister<XmmRegister>(),
3060                  codegen_->LiteralDoubleAddress(
3061                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3062       } else {
3063         DCHECK(second.IsDoubleStackSlot());
3064         __ subsd(first.AsFpuRegister<XmmRegister>(),
3065                  Address(CpuRegister(RSP), second.GetStackIndex()));
3066       }
3067       break;
3068     }
3069 
3070     default:
3071       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3072   }
3073 }
3074 
VisitMul(HMul * mul)3075 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3076   LocationSummary* locations =
3077       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3078   switch (mul->GetResultType()) {
3079     case Primitive::kPrimInt: {
3080       locations->SetInAt(0, Location::RequiresRegister());
3081       locations->SetInAt(1, Location::Any());
3082       if (mul->InputAt(1)->IsIntConstant()) {
3083         // Can use 3 operand multiply.
3084         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3085       } else {
3086         locations->SetOut(Location::SameAsFirstInput());
3087       }
3088       break;
3089     }
3090     case Primitive::kPrimLong: {
3091       locations->SetInAt(0, Location::RequiresRegister());
3092       locations->SetInAt(1, Location::Any());
3093       if (mul->InputAt(1)->IsLongConstant() &&
3094           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3095         // Can use 3 operand multiply.
3096         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3097       } else {
3098         locations->SetOut(Location::SameAsFirstInput());
3099       }
3100       break;
3101     }
3102     case Primitive::kPrimFloat:
3103     case Primitive::kPrimDouble: {
3104       locations->SetInAt(0, Location::RequiresFpuRegister());
3105       locations->SetInAt(1, Location::Any());
3106       locations->SetOut(Location::SameAsFirstInput());
3107       break;
3108     }
3109 
3110     default:
3111       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3112   }
3113 }
3114 
VisitMul(HMul * mul)3115 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3116   LocationSummary* locations = mul->GetLocations();
3117   Location first = locations->InAt(0);
3118   Location second = locations->InAt(1);
3119   Location out = locations->Out();
3120   switch (mul->GetResultType()) {
3121     case Primitive::kPrimInt:
3122       // The constant may have ended up in a register, so test explicitly to avoid
3123       // problems where the output may not be the same as the first operand.
3124       if (mul->InputAt(1)->IsIntConstant()) {
3125         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3126         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3127       } else if (second.IsRegister()) {
3128         DCHECK(first.Equals(out));
3129         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3130       } else {
3131         DCHECK(first.Equals(out));
3132         DCHECK(second.IsStackSlot());
3133         __ imull(first.AsRegister<CpuRegister>(),
3134                  Address(CpuRegister(RSP), second.GetStackIndex()));
3135       }
3136       break;
3137     case Primitive::kPrimLong: {
3138       // The constant may have ended up in a register, so test explicitly to avoid
3139       // problems where the output may not be the same as the first operand.
3140       if (mul->InputAt(1)->IsLongConstant()) {
3141         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3142         if (IsInt<32>(value)) {
3143           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3144                    Immediate(static_cast<int32_t>(value)));
3145         } else {
3146           // Have to use the constant area.
3147           DCHECK(first.Equals(out));
3148           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3149         }
3150       } else if (second.IsRegister()) {
3151         DCHECK(first.Equals(out));
3152         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3153       } else {
3154         DCHECK(second.IsDoubleStackSlot());
3155         DCHECK(first.Equals(out));
3156         __ imulq(first.AsRegister<CpuRegister>(),
3157                  Address(CpuRegister(RSP), second.GetStackIndex()));
3158       }
3159       break;
3160     }
3161 
3162     case Primitive::kPrimFloat: {
3163       DCHECK(first.Equals(out));
3164       if (second.IsFpuRegister()) {
3165         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3166       } else if (second.IsConstant()) {
3167         __ mulss(first.AsFpuRegister<XmmRegister>(),
3168                  codegen_->LiteralFloatAddress(
3169                      second.GetConstant()->AsFloatConstant()->GetValue()));
3170       } else {
3171         DCHECK(second.IsStackSlot());
3172         __ mulss(first.AsFpuRegister<XmmRegister>(),
3173                  Address(CpuRegister(RSP), second.GetStackIndex()));
3174       }
3175       break;
3176     }
3177 
3178     case Primitive::kPrimDouble: {
3179       DCHECK(first.Equals(out));
3180       if (second.IsFpuRegister()) {
3181         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3182       } else if (second.IsConstant()) {
3183         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3184                  codegen_->LiteralDoubleAddress(
3185                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3186       } else {
3187         DCHECK(second.IsDoubleStackSlot());
3188         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3189                  Address(CpuRegister(RSP), second.GetStackIndex()));
3190       }
3191       break;
3192     }
3193 
3194     default:
3195       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3196   }
3197 }
3198 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3199 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3200                                                      uint32_t stack_adjustment, bool is_float) {
3201   if (source.IsStackSlot()) {
3202     DCHECK(is_float);
3203     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3204   } else if (source.IsDoubleStackSlot()) {
3205     DCHECK(!is_float);
3206     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3207   } else {
3208     // Write the value to the temporary location on the stack and load to FP stack.
3209     if (is_float) {
3210       Location stack_temp = Location::StackSlot(temp_offset);
3211       codegen_->Move(stack_temp, source);
3212       __ flds(Address(CpuRegister(RSP), temp_offset));
3213     } else {
3214       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3215       codegen_->Move(stack_temp, source);
3216       __ fldl(Address(CpuRegister(RSP), temp_offset));
3217     }
3218   }
3219 }
3220 
GenerateRemFP(HRem * rem)3221 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3222   Primitive::Type type = rem->GetResultType();
3223   bool is_float = type == Primitive::kPrimFloat;
3224   size_t elem_size = Primitive::ComponentSize(type);
3225   LocationSummary* locations = rem->GetLocations();
3226   Location first = locations->InAt(0);
3227   Location second = locations->InAt(1);
3228   Location out = locations->Out();
3229 
3230   // Create stack space for 2 elements.
3231   // TODO: enhance register allocator to ask for stack temporaries.
3232   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3233 
3234   // Load the values to the FP stack in reverse order, using temporaries if needed.
3235   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3236   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3237 
3238   // Loop doing FPREM until we stabilize.
3239   NearLabel retry;
3240   __ Bind(&retry);
3241   __ fprem();
3242 
3243   // Move FP status to AX.
3244   __ fstsw();
3245 
3246   // And see if the argument reduction is complete. This is signaled by the
3247   // C2 FPU flag bit set to 0.
3248   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3249   __ j(kNotEqual, &retry);
3250 
3251   // We have settled on the final value. Retrieve it into an XMM register.
3252   // Store FP top of stack to real stack.
3253   if (is_float) {
3254     __ fsts(Address(CpuRegister(RSP), 0));
3255   } else {
3256     __ fstl(Address(CpuRegister(RSP), 0));
3257   }
3258 
3259   // Pop the 2 items from the FP stack.
3260   __ fucompp();
3261 
3262   // Load the value from the stack into an XMM register.
3263   DCHECK(out.IsFpuRegister()) << out;
3264   if (is_float) {
3265     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3266   } else {
3267     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3268   }
3269 
3270   // And remove the temporary stack space we allocated.
3271   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3272 }
3273 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3274 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3275   DCHECK(instruction->IsDiv() || instruction->IsRem());
3276 
3277   LocationSummary* locations = instruction->GetLocations();
3278   Location second = locations->InAt(1);
3279   DCHECK(second.IsConstant());
3280 
3281   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3282   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3283   int64_t imm = Int64FromConstant(second.GetConstant());
3284 
3285   DCHECK(imm == 1 || imm == -1);
3286 
3287   switch (instruction->GetResultType()) {
3288     case Primitive::kPrimInt: {
3289       if (instruction->IsRem()) {
3290         __ xorl(output_register, output_register);
3291       } else {
3292         __ movl(output_register, input_register);
3293         if (imm == -1) {
3294           __ negl(output_register);
3295         }
3296       }
3297       break;
3298     }
3299 
3300     case Primitive::kPrimLong: {
3301       if (instruction->IsRem()) {
3302         __ xorl(output_register, output_register);
3303       } else {
3304         __ movq(output_register, input_register);
3305         if (imm == -1) {
3306           __ negq(output_register);
3307         }
3308       }
3309       break;
3310     }
3311 
3312     default:
3313       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3314   }
3315 }
3316 
DivByPowerOfTwo(HDiv * instruction)3317 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3318   LocationSummary* locations = instruction->GetLocations();
3319   Location second = locations->InAt(1);
3320 
3321   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3322   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3323 
3324   int64_t imm = Int64FromConstant(second.GetConstant());
3325   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3326   uint64_t abs_imm = AbsOrMin(imm);
3327 
3328   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3329 
3330   if (instruction->GetResultType() == Primitive::kPrimInt) {
3331     __ leal(tmp, Address(numerator, abs_imm - 1));
3332     __ testl(numerator, numerator);
3333     __ cmov(kGreaterEqual, tmp, numerator);
3334     int shift = CTZ(imm);
3335     __ sarl(tmp, Immediate(shift));
3336 
3337     if (imm < 0) {
3338       __ negl(tmp);
3339     }
3340 
3341     __ movl(output_register, tmp);
3342   } else {
3343     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3344     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3345 
3346     codegen_->Load64BitValue(rdx, abs_imm - 1);
3347     __ addq(rdx, numerator);
3348     __ testq(numerator, numerator);
3349     __ cmov(kGreaterEqual, rdx, numerator);
3350     int shift = CTZ(imm);
3351     __ sarq(rdx, Immediate(shift));
3352 
3353     if (imm < 0) {
3354       __ negq(rdx);
3355     }
3356 
3357     __ movq(output_register, rdx);
3358   }
3359 }
3360 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3361 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3362   DCHECK(instruction->IsDiv() || instruction->IsRem());
3363 
3364   LocationSummary* locations = instruction->GetLocations();
3365   Location second = locations->InAt(1);
3366 
3367   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3368       : locations->GetTemp(0).AsRegister<CpuRegister>();
3369   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3370   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3371       : locations->Out().AsRegister<CpuRegister>();
3372   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3373 
3374   DCHECK_EQ(RAX, eax.AsRegister());
3375   DCHECK_EQ(RDX, edx.AsRegister());
3376   if (instruction->IsDiv()) {
3377     DCHECK_EQ(RAX, out.AsRegister());
3378   } else {
3379     DCHECK_EQ(RDX, out.AsRegister());
3380   }
3381 
3382   int64_t magic;
3383   int shift;
3384 
3385   // TODO: can these branches be written as one?
3386   if (instruction->GetResultType() == Primitive::kPrimInt) {
3387     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3388 
3389     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3390 
3391     __ movl(numerator, eax);
3392 
3393     NearLabel no_div;
3394     NearLabel end;
3395     __ testl(eax, eax);
3396     __ j(kNotEqual, &no_div);
3397 
3398     __ xorl(out, out);
3399     __ jmp(&end);
3400 
3401     __ Bind(&no_div);
3402 
3403     __ movl(eax, Immediate(magic));
3404     __ imull(numerator);
3405 
3406     if (imm > 0 && magic < 0) {
3407       __ addl(edx, numerator);
3408     } else if (imm < 0 && magic > 0) {
3409       __ subl(edx, numerator);
3410     }
3411 
3412     if (shift != 0) {
3413       __ sarl(edx, Immediate(shift));
3414     }
3415 
3416     __ movl(eax, edx);
3417     __ shrl(edx, Immediate(31));
3418     __ addl(edx, eax);
3419 
3420     if (instruction->IsRem()) {
3421       __ movl(eax, numerator);
3422       __ imull(edx, Immediate(imm));
3423       __ subl(eax, edx);
3424       __ movl(edx, eax);
3425     } else {
3426       __ movl(eax, edx);
3427     }
3428     __ Bind(&end);
3429   } else {
3430     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3431 
3432     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3433 
3434     CpuRegister rax = eax;
3435     CpuRegister rdx = edx;
3436 
3437     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3438 
3439     // Save the numerator.
3440     __ movq(numerator, rax);
3441 
3442     // RAX = magic
3443     codegen_->Load64BitValue(rax, magic);
3444 
3445     // RDX:RAX = magic * numerator
3446     __ imulq(numerator);
3447 
3448     if (imm > 0 && magic < 0) {
3449       // RDX += numerator
3450       __ addq(rdx, numerator);
3451     } else if (imm < 0 && magic > 0) {
3452       // RDX -= numerator
3453       __ subq(rdx, numerator);
3454     }
3455 
3456     // Shift if needed.
3457     if (shift != 0) {
3458       __ sarq(rdx, Immediate(shift));
3459     }
3460 
3461     // RDX += 1 if RDX < 0
3462     __ movq(rax, rdx);
3463     __ shrq(rdx, Immediate(63));
3464     __ addq(rdx, rax);
3465 
3466     if (instruction->IsRem()) {
3467       __ movq(rax, numerator);
3468 
3469       if (IsInt<32>(imm)) {
3470         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3471       } else {
3472         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3473       }
3474 
3475       __ subq(rax, rdx);
3476       __ movq(rdx, rax);
3477     } else {
3478       __ movq(rax, rdx);
3479     }
3480   }
3481 }
3482 
GenerateDivRemIntegral(HBinaryOperation * instruction)3483 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3484   DCHECK(instruction->IsDiv() || instruction->IsRem());
3485   Primitive::Type type = instruction->GetResultType();
3486   DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3487 
3488   bool is_div = instruction->IsDiv();
3489   LocationSummary* locations = instruction->GetLocations();
3490 
3491   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3492   Location second = locations->InAt(1);
3493 
3494   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3495   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3496 
3497   if (second.IsConstant()) {
3498     int64_t imm = Int64FromConstant(second.GetConstant());
3499 
3500     if (imm == 0) {
3501       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3502     } else if (imm == 1 || imm == -1) {
3503       DivRemOneOrMinusOne(instruction);
3504     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3505       DivByPowerOfTwo(instruction->AsDiv());
3506     } else {
3507       DCHECK(imm <= -2 || imm >= 2);
3508       GenerateDivRemWithAnyConstant(instruction);
3509     }
3510   } else {
3511     SlowPathCode* slow_path =
3512         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3513             instruction, out.AsRegister(), type, is_div);
3514     codegen_->AddSlowPath(slow_path);
3515 
3516     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3517     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3518     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3519     // so it's safe to just use negl instead of more complex comparisons.
3520     if (type == Primitive::kPrimInt) {
3521       __ cmpl(second_reg, Immediate(-1));
3522       __ j(kEqual, slow_path->GetEntryLabel());
3523       // edx:eax <- sign-extended of eax
3524       __ cdq();
3525       // eax = quotient, edx = remainder
3526       __ idivl(second_reg);
3527     } else {
3528       __ cmpq(second_reg, Immediate(-1));
3529       __ j(kEqual, slow_path->GetEntryLabel());
3530       // rdx:rax <- sign-extended of rax
3531       __ cqo();
3532       // rax = quotient, rdx = remainder
3533       __ idivq(second_reg);
3534     }
3535     __ Bind(slow_path->GetExitLabel());
3536   }
3537 }
3538 
VisitDiv(HDiv * div)3539 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3540   LocationSummary* locations =
3541       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3542   switch (div->GetResultType()) {
3543     case Primitive::kPrimInt:
3544     case Primitive::kPrimLong: {
3545       locations->SetInAt(0, Location::RegisterLocation(RAX));
3546       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3547       locations->SetOut(Location::SameAsFirstInput());
3548       // Intel uses edx:eax as the dividend.
3549       locations->AddTemp(Location::RegisterLocation(RDX));
3550       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3551       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3552       // output and request another temp.
3553       if (div->InputAt(1)->IsConstant()) {
3554         locations->AddTemp(Location::RequiresRegister());
3555       }
3556       break;
3557     }
3558 
3559     case Primitive::kPrimFloat:
3560     case Primitive::kPrimDouble: {
3561       locations->SetInAt(0, Location::RequiresFpuRegister());
3562       locations->SetInAt(1, Location::Any());
3563       locations->SetOut(Location::SameAsFirstInput());
3564       break;
3565     }
3566 
3567     default:
3568       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3569   }
3570 }
3571 
VisitDiv(HDiv * div)3572 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3573   LocationSummary* locations = div->GetLocations();
3574   Location first = locations->InAt(0);
3575   Location second = locations->InAt(1);
3576   DCHECK(first.Equals(locations->Out()));
3577 
3578   Primitive::Type type = div->GetResultType();
3579   switch (type) {
3580     case Primitive::kPrimInt:
3581     case Primitive::kPrimLong: {
3582       GenerateDivRemIntegral(div);
3583       break;
3584     }
3585 
3586     case Primitive::kPrimFloat: {
3587       if (second.IsFpuRegister()) {
3588         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3589       } else if (second.IsConstant()) {
3590         __ divss(first.AsFpuRegister<XmmRegister>(),
3591                  codegen_->LiteralFloatAddress(
3592                      second.GetConstant()->AsFloatConstant()->GetValue()));
3593       } else {
3594         DCHECK(second.IsStackSlot());
3595         __ divss(first.AsFpuRegister<XmmRegister>(),
3596                  Address(CpuRegister(RSP), second.GetStackIndex()));
3597       }
3598       break;
3599     }
3600 
3601     case Primitive::kPrimDouble: {
3602       if (second.IsFpuRegister()) {
3603         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3604       } else if (second.IsConstant()) {
3605         __ divsd(first.AsFpuRegister<XmmRegister>(),
3606                  codegen_->LiteralDoubleAddress(
3607                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3608       } else {
3609         DCHECK(second.IsDoubleStackSlot());
3610         __ divsd(first.AsFpuRegister<XmmRegister>(),
3611                  Address(CpuRegister(RSP), second.GetStackIndex()));
3612       }
3613       break;
3614     }
3615 
3616     default:
3617       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3618   }
3619 }
3620 
VisitRem(HRem * rem)3621 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3622   Primitive::Type type = rem->GetResultType();
3623   LocationSummary* locations =
3624     new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3625 
3626   switch (type) {
3627     case Primitive::kPrimInt:
3628     case Primitive::kPrimLong: {
3629       locations->SetInAt(0, Location::RegisterLocation(RAX));
3630       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3631       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3632       locations->SetOut(Location::RegisterLocation(RDX));
3633       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3634       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3635       // output and request another temp.
3636       if (rem->InputAt(1)->IsConstant()) {
3637         locations->AddTemp(Location::RequiresRegister());
3638       }
3639       break;
3640     }
3641 
3642     case Primitive::kPrimFloat:
3643     case Primitive::kPrimDouble: {
3644       locations->SetInAt(0, Location::Any());
3645       locations->SetInAt(1, Location::Any());
3646       locations->SetOut(Location::RequiresFpuRegister());
3647       locations->AddTemp(Location::RegisterLocation(RAX));
3648       break;
3649     }
3650 
3651     default:
3652       LOG(FATAL) << "Unexpected rem type " << type;
3653   }
3654 }
3655 
VisitRem(HRem * rem)3656 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3657   Primitive::Type type = rem->GetResultType();
3658   switch (type) {
3659     case Primitive::kPrimInt:
3660     case Primitive::kPrimLong: {
3661       GenerateDivRemIntegral(rem);
3662       break;
3663     }
3664     case Primitive::kPrimFloat:
3665     case Primitive::kPrimDouble: {
3666       GenerateRemFP(rem);
3667       break;
3668     }
3669     default:
3670       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3671   }
3672 }
3673 
VisitDivZeroCheck(HDivZeroCheck * instruction)3674 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3675   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3676       ? LocationSummary::kCallOnSlowPath
3677       : LocationSummary::kNoCall;
3678   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3679   locations->SetInAt(0, Location::Any());
3680   if (instruction->HasUses()) {
3681     locations->SetOut(Location::SameAsFirstInput());
3682   }
3683 }
3684 
VisitDivZeroCheck(HDivZeroCheck * instruction)3685 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3686   SlowPathCode* slow_path =
3687       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3688   codegen_->AddSlowPath(slow_path);
3689 
3690   LocationSummary* locations = instruction->GetLocations();
3691   Location value = locations->InAt(0);
3692 
3693   switch (instruction->GetType()) {
3694     case Primitive::kPrimBoolean:
3695     case Primitive::kPrimByte:
3696     case Primitive::kPrimChar:
3697     case Primitive::kPrimShort:
3698     case Primitive::kPrimInt: {
3699       if (value.IsRegister()) {
3700         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3701         __ j(kEqual, slow_path->GetEntryLabel());
3702       } else if (value.IsStackSlot()) {
3703         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3704         __ j(kEqual, slow_path->GetEntryLabel());
3705       } else {
3706         DCHECK(value.IsConstant()) << value;
3707         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3708         __ jmp(slow_path->GetEntryLabel());
3709         }
3710       }
3711       break;
3712     }
3713     case Primitive::kPrimLong: {
3714       if (value.IsRegister()) {
3715         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3716         __ j(kEqual, slow_path->GetEntryLabel());
3717       } else if (value.IsDoubleStackSlot()) {
3718         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3719         __ j(kEqual, slow_path->GetEntryLabel());
3720       } else {
3721         DCHECK(value.IsConstant()) << value;
3722         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3723         __ jmp(slow_path->GetEntryLabel());
3724         }
3725       }
3726       break;
3727     }
3728     default:
3729       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3730   }
3731 }
3732 
HandleShift(HBinaryOperation * op)3733 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3734   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3735 
3736   LocationSummary* locations =
3737       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3738 
3739   switch (op->GetResultType()) {
3740     case Primitive::kPrimInt:
3741     case Primitive::kPrimLong: {
3742       locations->SetInAt(0, Location::RequiresRegister());
3743       // The shift count needs to be in CL.
3744       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3745       locations->SetOut(Location::SameAsFirstInput());
3746       break;
3747     }
3748     default:
3749       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3750   }
3751 }
3752 
HandleShift(HBinaryOperation * op)3753 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3754   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3755 
3756   LocationSummary* locations = op->GetLocations();
3757   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3758   Location second = locations->InAt(1);
3759 
3760   switch (op->GetResultType()) {
3761     case Primitive::kPrimInt: {
3762       if (second.IsRegister()) {
3763         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3764         if (op->IsShl()) {
3765           __ shll(first_reg, second_reg);
3766         } else if (op->IsShr()) {
3767           __ sarl(first_reg, second_reg);
3768         } else {
3769           __ shrl(first_reg, second_reg);
3770         }
3771       } else {
3772         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3773         if (op->IsShl()) {
3774           __ shll(first_reg, imm);
3775         } else if (op->IsShr()) {
3776           __ sarl(first_reg, imm);
3777         } else {
3778           __ shrl(first_reg, imm);
3779         }
3780       }
3781       break;
3782     }
3783     case Primitive::kPrimLong: {
3784       if (second.IsRegister()) {
3785         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3786         if (op->IsShl()) {
3787           __ shlq(first_reg, second_reg);
3788         } else if (op->IsShr()) {
3789           __ sarq(first_reg, second_reg);
3790         } else {
3791           __ shrq(first_reg, second_reg);
3792         }
3793       } else {
3794         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3795         if (op->IsShl()) {
3796           __ shlq(first_reg, imm);
3797         } else if (op->IsShr()) {
3798           __ sarq(first_reg, imm);
3799         } else {
3800           __ shrq(first_reg, imm);
3801         }
3802       }
3803       break;
3804     }
3805     default:
3806       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3807       UNREACHABLE();
3808   }
3809 }
3810 
VisitRor(HRor * ror)3811 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3812   LocationSummary* locations =
3813       new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3814 
3815   switch (ror->GetResultType()) {
3816     case Primitive::kPrimInt:
3817     case Primitive::kPrimLong: {
3818       locations->SetInAt(0, Location::RequiresRegister());
3819       // The shift count needs to be in CL (unless it is a constant).
3820       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3821       locations->SetOut(Location::SameAsFirstInput());
3822       break;
3823     }
3824     default:
3825       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3826       UNREACHABLE();
3827   }
3828 }
3829 
VisitRor(HRor * ror)3830 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3831   LocationSummary* locations = ror->GetLocations();
3832   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3833   Location second = locations->InAt(1);
3834 
3835   switch (ror->GetResultType()) {
3836     case Primitive::kPrimInt:
3837       if (second.IsRegister()) {
3838         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3839         __ rorl(first_reg, second_reg);
3840       } else {
3841         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3842         __ rorl(first_reg, imm);
3843       }
3844       break;
3845     case Primitive::kPrimLong:
3846       if (second.IsRegister()) {
3847         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3848         __ rorq(first_reg, second_reg);
3849       } else {
3850         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3851         __ rorq(first_reg, imm);
3852       }
3853       break;
3854     default:
3855       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3856       UNREACHABLE();
3857   }
3858 }
3859 
VisitShl(HShl * shl)3860 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3861   HandleShift(shl);
3862 }
3863 
VisitShl(HShl * shl)3864 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3865   HandleShift(shl);
3866 }
3867 
VisitShr(HShr * shr)3868 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3869   HandleShift(shr);
3870 }
3871 
VisitShr(HShr * shr)3872 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3873   HandleShift(shr);
3874 }
3875 
VisitUShr(HUShr * ushr)3876 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3877   HandleShift(ushr);
3878 }
3879 
VisitUShr(HUShr * ushr)3880 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3881   HandleShift(ushr);
3882 }
3883 
VisitNewInstance(HNewInstance * instruction)3884 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3885   LocationSummary* locations =
3886       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3887   InvokeRuntimeCallingConvention calling_convention;
3888   if (instruction->IsStringAlloc()) {
3889     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3890   } else {
3891     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3892     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3893   }
3894   locations->SetOut(Location::RegisterLocation(RAX));
3895 }
3896 
VisitNewInstance(HNewInstance * instruction)3897 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3898   // Note: if heap poisoning is enabled, the entry point takes cares
3899   // of poisoning the reference.
3900   if (instruction->IsStringAlloc()) {
3901     // String is allocated through StringFactory. Call NewEmptyString entry point.
3902     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3903     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3904     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3905     __ call(Address(temp, code_offset.SizeValue()));
3906     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3907   } else {
3908     codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3909                             instruction,
3910                             instruction->GetDexPc(),
3911                             nullptr);
3912     CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3913     DCHECK(!codegen_->IsLeafMethod());
3914   }
3915 }
3916 
VisitNewArray(HNewArray * instruction)3917 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3918   LocationSummary* locations =
3919       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3920   InvokeRuntimeCallingConvention calling_convention;
3921   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3922   locations->SetOut(Location::RegisterLocation(RAX));
3923   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3924   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3925 }
3926 
VisitNewArray(HNewArray * instruction)3927 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3928   InvokeRuntimeCallingConvention calling_convention;
3929   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3930                            instruction->GetTypeIndex());
3931   // Note: if heap poisoning is enabled, the entry point takes cares
3932   // of poisoning the reference.
3933   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3934                           instruction,
3935                           instruction->GetDexPc(),
3936                           nullptr);
3937   CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3938 
3939   DCHECK(!codegen_->IsLeafMethod());
3940 }
3941 
VisitParameterValue(HParameterValue * instruction)3942 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3943   LocationSummary* locations =
3944       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3945   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3946   if (location.IsStackSlot()) {
3947     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3948   } else if (location.IsDoubleStackSlot()) {
3949     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3950   }
3951   locations->SetOut(location);
3952 }
3953 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)3954 void InstructionCodeGeneratorX86_64::VisitParameterValue(
3955     HParameterValue* instruction ATTRIBUTE_UNUSED) {
3956   // Nothing to do, the parameter is already at its location.
3957 }
3958 
VisitCurrentMethod(HCurrentMethod * instruction)3959 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3960   LocationSummary* locations =
3961       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3962   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3963 }
3964 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)3965 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3966     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3967   // Nothing to do, the method is already at its location.
3968 }
3969 
VisitClassTableGet(HClassTableGet * instruction)3970 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3971   LocationSummary* locations =
3972       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3973   locations->SetInAt(0, Location::RequiresRegister());
3974   locations->SetOut(Location::RequiresRegister());
3975 }
3976 
VisitClassTableGet(HClassTableGet * instruction)3977 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3978   LocationSummary* locations = instruction->GetLocations();
3979   uint32_t method_offset = 0;
3980   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3981     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3982         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3983   } else {
3984     method_offset = mirror::Class::EmbeddedImTableEntryOffset(
3985         instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
3986   }
3987   __ movq(locations->Out().AsRegister<CpuRegister>(),
3988           Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3989 }
3990 
VisitNot(HNot * not_)3991 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
3992   LocationSummary* locations =
3993       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
3994   locations->SetInAt(0, Location::RequiresRegister());
3995   locations->SetOut(Location::SameAsFirstInput());
3996 }
3997 
VisitNot(HNot * not_)3998 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
3999   LocationSummary* locations = not_->GetLocations();
4000   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4001             locations->Out().AsRegister<CpuRegister>().AsRegister());
4002   Location out = locations->Out();
4003   switch (not_->GetResultType()) {
4004     case Primitive::kPrimInt:
4005       __ notl(out.AsRegister<CpuRegister>());
4006       break;
4007 
4008     case Primitive::kPrimLong:
4009       __ notq(out.AsRegister<CpuRegister>());
4010       break;
4011 
4012     default:
4013       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4014   }
4015 }
4016 
VisitBooleanNot(HBooleanNot * bool_not)4017 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4018   LocationSummary* locations =
4019       new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4020   locations->SetInAt(0, Location::RequiresRegister());
4021   locations->SetOut(Location::SameAsFirstInput());
4022 }
4023 
VisitBooleanNot(HBooleanNot * bool_not)4024 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4025   LocationSummary* locations = bool_not->GetLocations();
4026   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4027             locations->Out().AsRegister<CpuRegister>().AsRegister());
4028   Location out = locations->Out();
4029   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4030 }
4031 
VisitPhi(HPhi * instruction)4032 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4033   LocationSummary* locations =
4034       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4035   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4036     locations->SetInAt(i, Location::Any());
4037   }
4038   locations->SetOut(Location::Any());
4039 }
4040 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4041 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4042   LOG(FATAL) << "Unimplemented";
4043 }
4044 
GenerateMemoryBarrier(MemBarrierKind kind)4045 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4046   /*
4047    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4048    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4049    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4050    */
4051   switch (kind) {
4052     case MemBarrierKind::kAnyAny: {
4053       MemoryFence();
4054       break;
4055     }
4056     case MemBarrierKind::kAnyStore:
4057     case MemBarrierKind::kLoadAny:
4058     case MemBarrierKind::kStoreStore: {
4059       // nop
4060       break;
4061     }
4062     default:
4063       LOG(FATAL) << "Unexpected memory barier " << kind;
4064   }
4065 }
4066 
HandleFieldGet(HInstruction * instruction)4067 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4068   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4069 
4070   bool object_field_get_with_read_barrier =
4071       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4072   LocationSummary* locations =
4073       new (GetGraph()->GetArena()) LocationSummary(instruction,
4074                                                    object_field_get_with_read_barrier ?
4075                                                        LocationSummary::kCallOnSlowPath :
4076                                                        LocationSummary::kNoCall);
4077   locations->SetInAt(0, Location::RequiresRegister());
4078   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4079     locations->SetOut(Location::RequiresFpuRegister());
4080   } else {
4081     // The output overlaps for an object field get when read barriers
4082     // are enabled: we do not want the move to overwrite the object's
4083     // location, as we need it to emit the read barrier.
4084     locations->SetOut(
4085         Location::RequiresRegister(),
4086         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4087   }
4088   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4089     // We need a temporary register for the read barrier marking slow
4090     // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4091     locations->AddTemp(Location::RequiresRegister());
4092   }
4093 }
4094 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4095 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4096                                                     const FieldInfo& field_info) {
4097   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4098 
4099   LocationSummary* locations = instruction->GetLocations();
4100   Location base_loc = locations->InAt(0);
4101   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4102   Location out = locations->Out();
4103   bool is_volatile = field_info.IsVolatile();
4104   Primitive::Type field_type = field_info.GetFieldType();
4105   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4106 
4107   switch (field_type) {
4108     case Primitive::kPrimBoolean: {
4109       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4110       break;
4111     }
4112 
4113     case Primitive::kPrimByte: {
4114       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4115       break;
4116     }
4117 
4118     case Primitive::kPrimShort: {
4119       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4120       break;
4121     }
4122 
4123     case Primitive::kPrimChar: {
4124       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4125       break;
4126     }
4127 
4128     case Primitive::kPrimInt: {
4129       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4130       break;
4131     }
4132 
4133     case Primitive::kPrimNot: {
4134       // /* HeapReference<Object> */ out = *(base + offset)
4135       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4136         Location temp_loc = locations->GetTemp(0);
4137         // Note that a potential implicit null check is handled in this
4138         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4139         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4140             instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4141         if (is_volatile) {
4142           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4143         }
4144       } else {
4145         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4146         codegen_->MaybeRecordImplicitNullCheck(instruction);
4147         if (is_volatile) {
4148           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4149         }
4150         // If read barriers are enabled, emit read barriers other than
4151         // Baker's using a slow path (and also unpoison the loaded
4152         // reference, if heap poisoning is enabled).
4153         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4154       }
4155       break;
4156     }
4157 
4158     case Primitive::kPrimLong: {
4159       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4160       break;
4161     }
4162 
4163     case Primitive::kPrimFloat: {
4164       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4165       break;
4166     }
4167 
4168     case Primitive::kPrimDouble: {
4169       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4170       break;
4171     }
4172 
4173     case Primitive::kPrimVoid:
4174       LOG(FATAL) << "Unreachable type " << field_type;
4175       UNREACHABLE();
4176   }
4177 
4178   if (field_type == Primitive::kPrimNot) {
4179     // Potential implicit null checks, in the case of reference
4180     // fields, are handled in the previous switch statement.
4181   } else {
4182     codegen_->MaybeRecordImplicitNullCheck(instruction);
4183   }
4184 
4185   if (is_volatile) {
4186     if (field_type == Primitive::kPrimNot) {
4187       // Memory barriers, in the case of references, are also handled
4188       // in the previous switch statement.
4189     } else {
4190       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4191     }
4192   }
4193 }
4194 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4195 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4196                                             const FieldInfo& field_info) {
4197   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4198 
4199   LocationSummary* locations =
4200       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4201   Primitive::Type field_type = field_info.GetFieldType();
4202   bool is_volatile = field_info.IsVolatile();
4203   bool needs_write_barrier =
4204       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4205 
4206   locations->SetInAt(0, Location::RequiresRegister());
4207   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4208     if (is_volatile) {
4209       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4210       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4211     } else {
4212       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4213     }
4214   } else {
4215     if (is_volatile) {
4216       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4217       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4218     } else {
4219       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4220     }
4221   }
4222   if (needs_write_barrier) {
4223     // Temporary registers for the write barrier.
4224     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4225     locations->AddTemp(Location::RequiresRegister());
4226   } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4227     // Temporary register for the reference poisoning.
4228     locations->AddTemp(Location::RequiresRegister());
4229   }
4230 }
4231 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4232 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4233                                                     const FieldInfo& field_info,
4234                                                     bool value_can_be_null) {
4235   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4236 
4237   LocationSummary* locations = instruction->GetLocations();
4238   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4239   Location value = locations->InAt(1);
4240   bool is_volatile = field_info.IsVolatile();
4241   Primitive::Type field_type = field_info.GetFieldType();
4242   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4243 
4244   if (is_volatile) {
4245     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4246   }
4247 
4248   bool maybe_record_implicit_null_check_done = false;
4249 
4250   switch (field_type) {
4251     case Primitive::kPrimBoolean:
4252     case Primitive::kPrimByte: {
4253       if (value.IsConstant()) {
4254         int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4255         __ movb(Address(base, offset), Immediate(v));
4256       } else {
4257         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4258       }
4259       break;
4260     }
4261 
4262     case Primitive::kPrimShort:
4263     case Primitive::kPrimChar: {
4264       if (value.IsConstant()) {
4265         int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4266         __ movw(Address(base, offset), Immediate(v));
4267       } else {
4268         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4269       }
4270       break;
4271     }
4272 
4273     case Primitive::kPrimInt:
4274     case Primitive::kPrimNot: {
4275       if (value.IsConstant()) {
4276         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4277         // `field_type == Primitive::kPrimNot` implies `v == 0`.
4278         DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4279         // Note: if heap poisoning is enabled, no need to poison
4280         // (negate) `v` if it is a reference, as it would be null.
4281         __ movl(Address(base, offset), Immediate(v));
4282       } else {
4283         if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4284           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4285           __ movl(temp, value.AsRegister<CpuRegister>());
4286           __ PoisonHeapReference(temp);
4287           __ movl(Address(base, offset), temp);
4288         } else {
4289           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4290         }
4291       }
4292       break;
4293     }
4294 
4295     case Primitive::kPrimLong: {
4296       if (value.IsConstant()) {
4297         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4298         codegen_->MoveInt64ToAddress(Address(base, offset),
4299                                      Address(base, offset + sizeof(int32_t)),
4300                                      v,
4301                                      instruction);
4302         maybe_record_implicit_null_check_done = true;
4303       } else {
4304         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4305       }
4306       break;
4307     }
4308 
4309     case Primitive::kPrimFloat: {
4310       if (value.IsConstant()) {
4311         int32_t v =
4312             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4313         __ movl(Address(base, offset), Immediate(v));
4314       } else {
4315         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4316       }
4317       break;
4318     }
4319 
4320     case Primitive::kPrimDouble: {
4321       if (value.IsConstant()) {
4322         int64_t v =
4323             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4324         codegen_->MoveInt64ToAddress(Address(base, offset),
4325                                      Address(base, offset + sizeof(int32_t)),
4326                                      v,
4327                                      instruction);
4328         maybe_record_implicit_null_check_done = true;
4329       } else {
4330         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4331       }
4332       break;
4333     }
4334 
4335     case Primitive::kPrimVoid:
4336       LOG(FATAL) << "Unreachable type " << field_type;
4337       UNREACHABLE();
4338   }
4339 
4340   if (!maybe_record_implicit_null_check_done) {
4341     codegen_->MaybeRecordImplicitNullCheck(instruction);
4342   }
4343 
4344   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4345     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4346     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4347     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4348   }
4349 
4350   if (is_volatile) {
4351     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4352   }
4353 }
4354 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4355 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4356   HandleFieldSet(instruction, instruction->GetFieldInfo());
4357 }
4358 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4359 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4360   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4361 }
4362 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4363 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4364   HandleFieldGet(instruction);
4365 }
4366 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4367 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4368   HandleFieldGet(instruction, instruction->GetFieldInfo());
4369 }
4370 
VisitStaticFieldGet(HStaticFieldGet * instruction)4371 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4372   HandleFieldGet(instruction);
4373 }
4374 
VisitStaticFieldGet(HStaticFieldGet * instruction)4375 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4376   HandleFieldGet(instruction, instruction->GetFieldInfo());
4377 }
4378 
VisitStaticFieldSet(HStaticFieldSet * instruction)4379 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4380   HandleFieldSet(instruction, instruction->GetFieldInfo());
4381 }
4382 
VisitStaticFieldSet(HStaticFieldSet * instruction)4383 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4384   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4385 }
4386 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4387 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4388     HUnresolvedInstanceFieldGet* instruction) {
4389   FieldAccessCallingConventionX86_64 calling_convention;
4390   codegen_->CreateUnresolvedFieldLocationSummary(
4391       instruction, instruction->GetFieldType(), calling_convention);
4392 }
4393 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4394 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4395     HUnresolvedInstanceFieldGet* instruction) {
4396   FieldAccessCallingConventionX86_64 calling_convention;
4397   codegen_->GenerateUnresolvedFieldAccess(instruction,
4398                                           instruction->GetFieldType(),
4399                                           instruction->GetFieldIndex(),
4400                                           instruction->GetDexPc(),
4401                                           calling_convention);
4402 }
4403 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4404 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4405     HUnresolvedInstanceFieldSet* instruction) {
4406   FieldAccessCallingConventionX86_64 calling_convention;
4407   codegen_->CreateUnresolvedFieldLocationSummary(
4408       instruction, instruction->GetFieldType(), calling_convention);
4409 }
4410 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4411 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4412     HUnresolvedInstanceFieldSet* instruction) {
4413   FieldAccessCallingConventionX86_64 calling_convention;
4414   codegen_->GenerateUnresolvedFieldAccess(instruction,
4415                                           instruction->GetFieldType(),
4416                                           instruction->GetFieldIndex(),
4417                                           instruction->GetDexPc(),
4418                                           calling_convention);
4419 }
4420 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4421 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4422     HUnresolvedStaticFieldGet* instruction) {
4423   FieldAccessCallingConventionX86_64 calling_convention;
4424   codegen_->CreateUnresolvedFieldLocationSummary(
4425       instruction, instruction->GetFieldType(), calling_convention);
4426 }
4427 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4428 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4429     HUnresolvedStaticFieldGet* instruction) {
4430   FieldAccessCallingConventionX86_64 calling_convention;
4431   codegen_->GenerateUnresolvedFieldAccess(instruction,
4432                                           instruction->GetFieldType(),
4433                                           instruction->GetFieldIndex(),
4434                                           instruction->GetDexPc(),
4435                                           calling_convention);
4436 }
4437 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4438 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4439     HUnresolvedStaticFieldSet* instruction) {
4440   FieldAccessCallingConventionX86_64 calling_convention;
4441   codegen_->CreateUnresolvedFieldLocationSummary(
4442       instruction, instruction->GetFieldType(), calling_convention);
4443 }
4444 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4445 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4446     HUnresolvedStaticFieldSet* instruction) {
4447   FieldAccessCallingConventionX86_64 calling_convention;
4448   codegen_->GenerateUnresolvedFieldAccess(instruction,
4449                                           instruction->GetFieldType(),
4450                                           instruction->GetFieldIndex(),
4451                                           instruction->GetDexPc(),
4452                                           calling_convention);
4453 }
4454 
VisitNullCheck(HNullCheck * instruction)4455 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4456   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4457       ? LocationSummary::kCallOnSlowPath
4458       : LocationSummary::kNoCall;
4459   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4460   Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4461       ? Location::RequiresRegister()
4462       : Location::Any();
4463   locations->SetInAt(0, loc);
4464   if (instruction->HasUses()) {
4465     locations->SetOut(Location::SameAsFirstInput());
4466   }
4467 }
4468 
GenerateImplicitNullCheck(HNullCheck * instruction)4469 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4470   if (CanMoveNullCheckToUser(instruction)) {
4471     return;
4472   }
4473   LocationSummary* locations = instruction->GetLocations();
4474   Location obj = locations->InAt(0);
4475 
4476   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4477   RecordPcInfo(instruction, instruction->GetDexPc());
4478 }
4479 
GenerateExplicitNullCheck(HNullCheck * instruction)4480 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4481   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4482   AddSlowPath(slow_path);
4483 
4484   LocationSummary* locations = instruction->GetLocations();
4485   Location obj = locations->InAt(0);
4486 
4487   if (obj.IsRegister()) {
4488     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4489   } else if (obj.IsStackSlot()) {
4490     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4491   } else {
4492     DCHECK(obj.IsConstant()) << obj;
4493     DCHECK(obj.GetConstant()->IsNullConstant());
4494     __ jmp(slow_path->GetEntryLabel());
4495     return;
4496   }
4497   __ j(kEqual, slow_path->GetEntryLabel());
4498 }
4499 
VisitNullCheck(HNullCheck * instruction)4500 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4501   codegen_->GenerateNullCheck(instruction);
4502 }
4503 
VisitArrayGet(HArrayGet * instruction)4504 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4505   bool object_array_get_with_read_barrier =
4506       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4507   LocationSummary* locations =
4508       new (GetGraph()->GetArena()) LocationSummary(instruction,
4509                                                    object_array_get_with_read_barrier ?
4510                                                        LocationSummary::kCallOnSlowPath :
4511                                                        LocationSummary::kNoCall);
4512   locations->SetInAt(0, Location::RequiresRegister());
4513   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4514   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4515     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4516   } else {
4517     // The output overlaps for an object array get when read barriers
4518     // are enabled: we do not want the move to overwrite the array's
4519     // location, as we need it to emit the read barrier.
4520     locations->SetOut(
4521         Location::RequiresRegister(),
4522         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4523   }
4524   // We need a temporary register for the read barrier marking slow
4525   // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4526   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4527     locations->AddTemp(Location::RequiresRegister());
4528   }
4529 }
4530 
VisitArrayGet(HArrayGet * instruction)4531 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4532   LocationSummary* locations = instruction->GetLocations();
4533   Location obj_loc = locations->InAt(0);
4534   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4535   Location index = locations->InAt(1);
4536   Location out_loc = locations->Out();
4537 
4538   Primitive::Type type = instruction->GetType();
4539   switch (type) {
4540     case Primitive::kPrimBoolean: {
4541       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4542       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4543       if (index.IsConstant()) {
4544         __ movzxb(out, Address(obj,
4545             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4546       } else {
4547         __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4548       }
4549       break;
4550     }
4551 
4552     case Primitive::kPrimByte: {
4553       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4554       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4555       if (index.IsConstant()) {
4556         __ movsxb(out, Address(obj,
4557             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4558       } else {
4559         __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4560       }
4561       break;
4562     }
4563 
4564     case Primitive::kPrimShort: {
4565       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4566       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4567       if (index.IsConstant()) {
4568         __ movsxw(out, Address(obj,
4569             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4570       } else {
4571         __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4572       }
4573       break;
4574     }
4575 
4576     case Primitive::kPrimChar: {
4577       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4578       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4579       if (index.IsConstant()) {
4580         __ movzxw(out, Address(obj,
4581             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4582       } else {
4583         __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4584       }
4585       break;
4586     }
4587 
4588     case Primitive::kPrimInt: {
4589       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4590       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4591       if (index.IsConstant()) {
4592         __ movl(out, Address(obj,
4593             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4594       } else {
4595         __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4596       }
4597       break;
4598     }
4599 
4600     case Primitive::kPrimNot: {
4601       static_assert(
4602           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4603           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4604       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4605       // /* HeapReference<Object> */ out =
4606       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4607       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4608         Location temp = locations->GetTemp(0);
4609         // Note that a potential implicit null check is handled in this
4610         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4611         codegen_->GenerateArrayLoadWithBakerReadBarrier(
4612             instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4613       } else {
4614         CpuRegister out = out_loc.AsRegister<CpuRegister>();
4615         if (index.IsConstant()) {
4616           uint32_t offset =
4617               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4618           __ movl(out, Address(obj, offset));
4619           codegen_->MaybeRecordImplicitNullCheck(instruction);
4620           // If read barriers are enabled, emit read barriers other than
4621           // Baker's using a slow path (and also unpoison the loaded
4622           // reference, if heap poisoning is enabled).
4623           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4624         } else {
4625           __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4626           codegen_->MaybeRecordImplicitNullCheck(instruction);
4627           // If read barriers are enabled, emit read barriers other than
4628           // Baker's using a slow path (and also unpoison the loaded
4629           // reference, if heap poisoning is enabled).
4630           codegen_->MaybeGenerateReadBarrierSlow(
4631               instruction, out_loc, out_loc, obj_loc, data_offset, index);
4632         }
4633       }
4634       break;
4635     }
4636 
4637     case Primitive::kPrimLong: {
4638       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4639       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4640       if (index.IsConstant()) {
4641         __ movq(out, Address(obj,
4642             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4643       } else {
4644         __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4645       }
4646       break;
4647     }
4648 
4649     case Primitive::kPrimFloat: {
4650       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4651       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4652       if (index.IsConstant()) {
4653         __ movss(out, Address(obj,
4654             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4655       } else {
4656         __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4657       }
4658       break;
4659     }
4660 
4661     case Primitive::kPrimDouble: {
4662       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4663       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4664       if (index.IsConstant()) {
4665         __ movsd(out, Address(obj,
4666             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4667       } else {
4668         __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4669       }
4670       break;
4671     }
4672 
4673     case Primitive::kPrimVoid:
4674       LOG(FATAL) << "Unreachable type " << type;
4675       UNREACHABLE();
4676   }
4677 
4678   if (type == Primitive::kPrimNot) {
4679     // Potential implicit null checks, in the case of reference
4680     // arrays, are handled in the previous switch statement.
4681   } else {
4682     codegen_->MaybeRecordImplicitNullCheck(instruction);
4683   }
4684 }
4685 
VisitArraySet(HArraySet * instruction)4686 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4687   Primitive::Type value_type = instruction->GetComponentType();
4688 
4689   bool needs_write_barrier =
4690       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4691   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4692   bool object_array_set_with_read_barrier =
4693       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4694 
4695   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4696       instruction,
4697       (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4698           LocationSummary::kCallOnSlowPath :
4699           LocationSummary::kNoCall);
4700 
4701   locations->SetInAt(0, Location::RequiresRegister());
4702   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4703   if (Primitive::IsFloatingPointType(value_type)) {
4704     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4705   } else {
4706     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4707   }
4708 
4709   if (needs_write_barrier) {
4710     // Temporary registers for the write barrier.
4711 
4712     // This first temporary register is possibly used for heap
4713     // reference poisoning and/or read barrier emission too.
4714     locations->AddTemp(Location::RequiresRegister());
4715     locations->AddTemp(Location::RequiresRegister());
4716   }
4717 }
4718 
VisitArraySet(HArraySet * instruction)4719 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4720   LocationSummary* locations = instruction->GetLocations();
4721   Location array_loc = locations->InAt(0);
4722   CpuRegister array = array_loc.AsRegister<CpuRegister>();
4723   Location index = locations->InAt(1);
4724   Location value = locations->InAt(2);
4725   Primitive::Type value_type = instruction->GetComponentType();
4726   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4727   bool needs_write_barrier =
4728       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4729   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4730   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4731   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4732 
4733   switch (value_type) {
4734     case Primitive::kPrimBoolean:
4735     case Primitive::kPrimByte: {
4736       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4737       Address address = index.IsConstant()
4738           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4739           : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4740       if (value.IsRegister()) {
4741         __ movb(address, value.AsRegister<CpuRegister>());
4742       } else {
4743         __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4744       }
4745       codegen_->MaybeRecordImplicitNullCheck(instruction);
4746       break;
4747     }
4748 
4749     case Primitive::kPrimShort:
4750     case Primitive::kPrimChar: {
4751       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4752       Address address = index.IsConstant()
4753           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4754           : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4755       if (value.IsRegister()) {
4756         __ movw(address, value.AsRegister<CpuRegister>());
4757       } else {
4758         DCHECK(value.IsConstant()) << value;
4759         __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4760       }
4761       codegen_->MaybeRecordImplicitNullCheck(instruction);
4762       break;
4763     }
4764 
4765     case Primitive::kPrimNot: {
4766       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4767       Address address = index.IsConstant()
4768           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4769           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4770 
4771       if (!value.IsRegister()) {
4772         // Just setting null.
4773         DCHECK(instruction->InputAt(2)->IsNullConstant());
4774         DCHECK(value.IsConstant()) << value;
4775         __ movl(address, Immediate(0));
4776         codegen_->MaybeRecordImplicitNullCheck(instruction);
4777         DCHECK(!needs_write_barrier);
4778         DCHECK(!may_need_runtime_call_for_type_check);
4779         break;
4780       }
4781 
4782       DCHECK(needs_write_barrier);
4783       CpuRegister register_value = value.AsRegister<CpuRegister>();
4784       NearLabel done, not_null, do_put;
4785       SlowPathCode* slow_path = nullptr;
4786       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4787       if (may_need_runtime_call_for_type_check) {
4788         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4789         codegen_->AddSlowPath(slow_path);
4790         if (instruction->GetValueCanBeNull()) {
4791           __ testl(register_value, register_value);
4792           __ j(kNotEqual, &not_null);
4793           __ movl(address, Immediate(0));
4794           codegen_->MaybeRecordImplicitNullCheck(instruction);
4795           __ jmp(&done);
4796           __ Bind(&not_null);
4797         }
4798 
4799         if (kEmitCompilerReadBarrier) {
4800           // When read barriers are enabled, the type checking
4801           // instrumentation requires two read barriers:
4802           //
4803           //   __ movl(temp2, temp);
4804           //   // /* HeapReference<Class> */ temp = temp->component_type_
4805           //   __ movl(temp, Address(temp, component_offset));
4806           //   codegen_->GenerateReadBarrierSlow(
4807           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4808           //
4809           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
4810           //   __ movl(temp2, Address(register_value, class_offset));
4811           //   codegen_->GenerateReadBarrierSlow(
4812           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4813           //
4814           //   __ cmpl(temp, temp2);
4815           //
4816           // However, the second read barrier may trash `temp`, as it
4817           // is a temporary register, and as such would not be saved
4818           // along with live registers before calling the runtime (nor
4819           // restored afterwards).  So in this case, we bail out and
4820           // delegate the work to the array set slow path.
4821           //
4822           // TODO: Extend the register allocator to support a new
4823           // "(locally) live temp" location so as to avoid always
4824           // going into the slow path when read barriers are enabled.
4825           __ jmp(slow_path->GetEntryLabel());
4826         } else {
4827           // /* HeapReference<Class> */ temp = array->klass_
4828           __ movl(temp, Address(array, class_offset));
4829           codegen_->MaybeRecordImplicitNullCheck(instruction);
4830           __ MaybeUnpoisonHeapReference(temp);
4831 
4832           // /* HeapReference<Class> */ temp = temp->component_type_
4833           __ movl(temp, Address(temp, component_offset));
4834           // If heap poisoning is enabled, no need to unpoison `temp`
4835           // nor the object reference in `register_value->klass`, as
4836           // we are comparing two poisoned references.
4837           __ cmpl(temp, Address(register_value, class_offset));
4838 
4839           if (instruction->StaticTypeOfArrayIsObjectArray()) {
4840             __ j(kEqual, &do_put);
4841             // If heap poisoning is enabled, the `temp` reference has
4842             // not been unpoisoned yet; unpoison it now.
4843             __ MaybeUnpoisonHeapReference(temp);
4844 
4845             // /* HeapReference<Class> */ temp = temp->super_class_
4846             __ movl(temp, Address(temp, super_offset));
4847             // If heap poisoning is enabled, no need to unpoison
4848             // `temp`, as we are comparing against null below.
4849             __ testl(temp, temp);
4850             __ j(kNotEqual, slow_path->GetEntryLabel());
4851             __ Bind(&do_put);
4852           } else {
4853             __ j(kNotEqual, slow_path->GetEntryLabel());
4854           }
4855         }
4856       }
4857 
4858       if (kPoisonHeapReferences) {
4859         __ movl(temp, register_value);
4860         __ PoisonHeapReference(temp);
4861         __ movl(address, temp);
4862       } else {
4863         __ movl(address, register_value);
4864       }
4865       if (!may_need_runtime_call_for_type_check) {
4866         codegen_->MaybeRecordImplicitNullCheck(instruction);
4867       }
4868 
4869       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4870       codegen_->MarkGCCard(
4871           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4872       __ Bind(&done);
4873 
4874       if (slow_path != nullptr) {
4875         __ Bind(slow_path->GetExitLabel());
4876       }
4877 
4878       break;
4879     }
4880 
4881     case Primitive::kPrimInt: {
4882       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4883       Address address = index.IsConstant()
4884           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4885           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4886       if (value.IsRegister()) {
4887         __ movl(address, value.AsRegister<CpuRegister>());
4888       } else {
4889         DCHECK(value.IsConstant()) << value;
4890         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4891         __ movl(address, Immediate(v));
4892       }
4893       codegen_->MaybeRecordImplicitNullCheck(instruction);
4894       break;
4895     }
4896 
4897     case Primitive::kPrimLong: {
4898       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4899       Address address = index.IsConstant()
4900           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4901           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4902       if (value.IsRegister()) {
4903         __ movq(address, value.AsRegister<CpuRegister>());
4904         codegen_->MaybeRecordImplicitNullCheck(instruction);
4905       } else {
4906         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4907         Address address_high = index.IsConstant()
4908             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4909                 offset + sizeof(int32_t))
4910             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4911         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4912       }
4913       break;
4914     }
4915 
4916     case Primitive::kPrimFloat: {
4917       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4918       Address address = index.IsConstant()
4919           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4920           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4921       if (value.IsFpuRegister()) {
4922         __ movss(address, value.AsFpuRegister<XmmRegister>());
4923       } else {
4924         DCHECK(value.IsConstant());
4925         int32_t v =
4926             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4927         __ movl(address, Immediate(v));
4928       }
4929       codegen_->MaybeRecordImplicitNullCheck(instruction);
4930       break;
4931     }
4932 
4933     case Primitive::kPrimDouble: {
4934       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4935       Address address = index.IsConstant()
4936           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4937           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4938       if (value.IsFpuRegister()) {
4939         __ movsd(address, value.AsFpuRegister<XmmRegister>());
4940         codegen_->MaybeRecordImplicitNullCheck(instruction);
4941       } else {
4942         int64_t v =
4943             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4944         Address address_high = index.IsConstant()
4945             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4946                 offset + sizeof(int32_t))
4947             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4948         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4949       }
4950       break;
4951     }
4952 
4953     case Primitive::kPrimVoid:
4954       LOG(FATAL) << "Unreachable type " << instruction->GetType();
4955       UNREACHABLE();
4956   }
4957 }
4958 
VisitArrayLength(HArrayLength * instruction)4959 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4960   LocationSummary* locations =
4961       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4962   locations->SetInAt(0, Location::RequiresRegister());
4963   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4964 }
4965 
VisitArrayLength(HArrayLength * instruction)4966 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4967   LocationSummary* locations = instruction->GetLocations();
4968   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4969   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4970   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4971   __ movl(out, Address(obj, offset));
4972   codegen_->MaybeRecordImplicitNullCheck(instruction);
4973 }
4974 
VisitBoundsCheck(HBoundsCheck * instruction)4975 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4976   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4977       ? LocationSummary::kCallOnSlowPath
4978       : LocationSummary::kNoCall;
4979   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4980   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4981   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4982   if (instruction->HasUses()) {
4983     locations->SetOut(Location::SameAsFirstInput());
4984   }
4985 }
4986 
VisitBoundsCheck(HBoundsCheck * instruction)4987 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4988   LocationSummary* locations = instruction->GetLocations();
4989   Location index_loc = locations->InAt(0);
4990   Location length_loc = locations->InAt(1);
4991   SlowPathCode* slow_path =
4992       new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
4993 
4994   if (length_loc.IsConstant()) {
4995     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
4996     if (index_loc.IsConstant()) {
4997       // BCE will remove the bounds check if we are guarenteed to pass.
4998       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
4999       if (index < 0 || index >= length) {
5000         codegen_->AddSlowPath(slow_path);
5001         __ jmp(slow_path->GetEntryLabel());
5002       } else {
5003         // Some optimization after BCE may have generated this, and we should not
5004         // generate a bounds check if it is a valid range.
5005       }
5006       return;
5007     }
5008 
5009     // We have to reverse the jump condition because the length is the constant.
5010     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5011     __ cmpl(index_reg, Immediate(length));
5012     codegen_->AddSlowPath(slow_path);
5013     __ j(kAboveEqual, slow_path->GetEntryLabel());
5014   } else {
5015     CpuRegister length = length_loc.AsRegister<CpuRegister>();
5016     if (index_loc.IsConstant()) {
5017       int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5018       __ cmpl(length, Immediate(value));
5019     } else {
5020       __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5021     }
5022     codegen_->AddSlowPath(slow_path);
5023     __ j(kBelowEqual, slow_path->GetEntryLabel());
5024   }
5025 }
5026 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5027 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5028                                      CpuRegister card,
5029                                      CpuRegister object,
5030                                      CpuRegister value,
5031                                      bool value_can_be_null) {
5032   NearLabel is_null;
5033   if (value_can_be_null) {
5034     __ testl(value, value);
5035     __ j(kEqual, &is_null);
5036   }
5037   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5038                                         /* no_rip */ true));
5039   __ movq(temp, object);
5040   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5041   __ movb(Address(temp, card, TIMES_1, 0), card);
5042   if (value_can_be_null) {
5043     __ Bind(&is_null);
5044   }
5045 }
5046 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5047 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5048   LOG(FATAL) << "Unimplemented";
5049 }
5050 
VisitParallelMove(HParallelMove * instruction)5051 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5052   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5053 }
5054 
VisitSuspendCheck(HSuspendCheck * instruction)5055 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5056   new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5057 }
5058 
VisitSuspendCheck(HSuspendCheck * instruction)5059 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5060   HBasicBlock* block = instruction->GetBlock();
5061   if (block->GetLoopInformation() != nullptr) {
5062     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5063     // The back edge will generate the suspend check.
5064     return;
5065   }
5066   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5067     // The goto will generate the suspend check.
5068     return;
5069   }
5070   GenerateSuspendCheck(instruction, nullptr);
5071 }
5072 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5073 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5074                                                           HBasicBlock* successor) {
5075   SuspendCheckSlowPathX86_64* slow_path =
5076       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5077   if (slow_path == nullptr) {
5078     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5079     instruction->SetSlowPath(slow_path);
5080     codegen_->AddSlowPath(slow_path);
5081     if (successor != nullptr) {
5082       DCHECK(successor->IsLoopHeader());
5083       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5084     }
5085   } else {
5086     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5087   }
5088 
5089   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5090                                   /* no_rip */ true),
5091                 Immediate(0));
5092   if (successor == nullptr) {
5093     __ j(kNotEqual, slow_path->GetEntryLabel());
5094     __ Bind(slow_path->GetReturnLabel());
5095   } else {
5096     __ j(kEqual, codegen_->GetLabelOf(successor));
5097     __ jmp(slow_path->GetEntryLabel());
5098   }
5099 }
5100 
GetAssembler() const5101 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5102   return codegen_->GetAssembler();
5103 }
5104 
EmitMove(size_t index)5105 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5106   MoveOperands* move = moves_[index];
5107   Location source = move->GetSource();
5108   Location destination = move->GetDestination();
5109 
5110   if (source.IsRegister()) {
5111     if (destination.IsRegister()) {
5112       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5113     } else if (destination.IsStackSlot()) {
5114       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5115               source.AsRegister<CpuRegister>());
5116     } else {
5117       DCHECK(destination.IsDoubleStackSlot());
5118       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5119               source.AsRegister<CpuRegister>());
5120     }
5121   } else if (source.IsStackSlot()) {
5122     if (destination.IsRegister()) {
5123       __ movl(destination.AsRegister<CpuRegister>(),
5124               Address(CpuRegister(RSP), source.GetStackIndex()));
5125     } else if (destination.IsFpuRegister()) {
5126       __ movss(destination.AsFpuRegister<XmmRegister>(),
5127               Address(CpuRegister(RSP), source.GetStackIndex()));
5128     } else {
5129       DCHECK(destination.IsStackSlot());
5130       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5131       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5132     }
5133   } else if (source.IsDoubleStackSlot()) {
5134     if (destination.IsRegister()) {
5135       __ movq(destination.AsRegister<CpuRegister>(),
5136               Address(CpuRegister(RSP), source.GetStackIndex()));
5137     } else if (destination.IsFpuRegister()) {
5138       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5139                Address(CpuRegister(RSP), source.GetStackIndex()));
5140     } else {
5141       DCHECK(destination.IsDoubleStackSlot()) << destination;
5142       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5143       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5144     }
5145   } else if (source.IsConstant()) {
5146     HConstant* constant = source.GetConstant();
5147     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5148       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5149       if (destination.IsRegister()) {
5150         if (value == 0) {
5151           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5152         } else {
5153           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5154         }
5155       } else {
5156         DCHECK(destination.IsStackSlot()) << destination;
5157         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5158       }
5159     } else if (constant->IsLongConstant()) {
5160       int64_t value = constant->AsLongConstant()->GetValue();
5161       if (destination.IsRegister()) {
5162         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5163       } else {
5164         DCHECK(destination.IsDoubleStackSlot()) << destination;
5165         codegen_->Store64BitValueToStack(destination, value);
5166       }
5167     } else if (constant->IsFloatConstant()) {
5168       float fp_value = constant->AsFloatConstant()->GetValue();
5169       if (destination.IsFpuRegister()) {
5170         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5171         codegen_->Load32BitValue(dest, fp_value);
5172       } else {
5173         DCHECK(destination.IsStackSlot()) << destination;
5174         Immediate imm(bit_cast<int32_t, float>(fp_value));
5175         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5176       }
5177     } else {
5178       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5179       double fp_value =  constant->AsDoubleConstant()->GetValue();
5180       int64_t value = bit_cast<int64_t, double>(fp_value);
5181       if (destination.IsFpuRegister()) {
5182         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5183         codegen_->Load64BitValue(dest, fp_value);
5184       } else {
5185         DCHECK(destination.IsDoubleStackSlot()) << destination;
5186         codegen_->Store64BitValueToStack(destination, value);
5187       }
5188     }
5189   } else if (source.IsFpuRegister()) {
5190     if (destination.IsFpuRegister()) {
5191       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5192     } else if (destination.IsStackSlot()) {
5193       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5194                source.AsFpuRegister<XmmRegister>());
5195     } else {
5196       DCHECK(destination.IsDoubleStackSlot()) << destination;
5197       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5198                source.AsFpuRegister<XmmRegister>());
5199     }
5200   }
5201 }
5202 
Exchange32(CpuRegister reg,int mem)5203 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5204   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5205   __ movl(Address(CpuRegister(RSP), mem), reg);
5206   __ movl(reg, CpuRegister(TMP));
5207 }
5208 
Exchange32(int mem1,int mem2)5209 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5210   ScratchRegisterScope ensure_scratch(
5211       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5212 
5213   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5214   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5215   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5216           Address(CpuRegister(RSP), mem2 + stack_offset));
5217   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5218   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5219           CpuRegister(ensure_scratch.GetRegister()));
5220 }
5221 
Exchange64(CpuRegister reg1,CpuRegister reg2)5222 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5223   __ movq(CpuRegister(TMP), reg1);
5224   __ movq(reg1, reg2);
5225   __ movq(reg2, CpuRegister(TMP));
5226 }
5227 
Exchange64(CpuRegister reg,int mem)5228 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5229   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5230   __ movq(Address(CpuRegister(RSP), mem), reg);
5231   __ movq(reg, CpuRegister(TMP));
5232 }
5233 
Exchange64(int mem1,int mem2)5234 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5235   ScratchRegisterScope ensure_scratch(
5236       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5237 
5238   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5239   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5240   __ movq(CpuRegister(ensure_scratch.GetRegister()),
5241           Address(CpuRegister(RSP), mem2 + stack_offset));
5242   __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5243   __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5244           CpuRegister(ensure_scratch.GetRegister()));
5245 }
5246 
Exchange32(XmmRegister reg,int mem)5247 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5248   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5249   __ movss(Address(CpuRegister(RSP), mem), reg);
5250   __ movd(reg, CpuRegister(TMP));
5251 }
5252 
Exchange64(XmmRegister reg,int mem)5253 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5254   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5255   __ movsd(Address(CpuRegister(RSP), mem), reg);
5256   __ movd(reg, CpuRegister(TMP));
5257 }
5258 
EmitSwap(size_t index)5259 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5260   MoveOperands* move = moves_[index];
5261   Location source = move->GetSource();
5262   Location destination = move->GetDestination();
5263 
5264   if (source.IsRegister() && destination.IsRegister()) {
5265     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5266   } else if (source.IsRegister() && destination.IsStackSlot()) {
5267     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5268   } else if (source.IsStackSlot() && destination.IsRegister()) {
5269     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5270   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5271     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5272   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5273     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5274   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5275     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5276   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5277     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5278   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5279     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5280     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5281     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5282   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5283     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5284   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5285     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5286   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5287     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5288   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5289     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5290   } else {
5291     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5292   }
5293 }
5294 
5295 
SpillScratch(int reg)5296 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5297   __ pushq(CpuRegister(reg));
5298 }
5299 
5300 
RestoreScratch(int reg)5301 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5302   __ popq(CpuRegister(reg));
5303 }
5304 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5305 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5306     SlowPathCode* slow_path, CpuRegister class_reg) {
5307   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
5308           Immediate(mirror::Class::kStatusInitialized));
5309   __ j(kLess, slow_path->GetEntryLabel());
5310   __ Bind(slow_path->GetExitLabel());
5311   // No need for memory fence, thanks to the x86-64 memory model.
5312 }
5313 
VisitLoadClass(HLoadClass * cls)5314 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5315   InvokeRuntimeCallingConvention calling_convention;
5316   CodeGenerator::CreateLoadClassLocationSummary(
5317       cls,
5318       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5319       Location::RegisterLocation(RAX),
5320       /* code_generator_supports_read_barrier */ true);
5321 }
5322 
VisitLoadClass(HLoadClass * cls)5323 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5324   LocationSummary* locations = cls->GetLocations();
5325   if (cls->NeedsAccessCheck()) {
5326     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5327     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5328                             cls,
5329                             cls->GetDexPc(),
5330                             nullptr);
5331     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5332     return;
5333   }
5334 
5335   Location out_loc = locations->Out();
5336   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5337   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5338 
5339   if (cls->IsReferrersClass()) {
5340     DCHECK(!cls->CanCallRuntime());
5341     DCHECK(!cls->MustGenerateClinitCheck());
5342     // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5343     GenerateGcRootFieldLoad(
5344         cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5345   } else {
5346     // /* GcRoot<mirror::Class>[] */ out =
5347     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
5348     __ movq(out, Address(current_method,
5349                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5350     // /* GcRoot<mirror::Class> */ out = out[type_index]
5351     GenerateGcRootFieldLoad(
5352         cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5353 
5354     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5355       DCHECK(cls->CanCallRuntime());
5356       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5357           cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5358       codegen_->AddSlowPath(slow_path);
5359       if (!cls->IsInDexCache()) {
5360         __ testl(out, out);
5361         __ j(kEqual, slow_path->GetEntryLabel());
5362       }
5363       if (cls->MustGenerateClinitCheck()) {
5364         GenerateClassInitializationCheck(slow_path, out);
5365       } else {
5366         __ Bind(slow_path->GetExitLabel());
5367       }
5368     }
5369   }
5370 }
5371 
VisitClinitCheck(HClinitCheck * check)5372 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5373   LocationSummary* locations =
5374       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5375   locations->SetInAt(0, Location::RequiresRegister());
5376   if (check->HasUses()) {
5377     locations->SetOut(Location::SameAsFirstInput());
5378   }
5379 }
5380 
VisitClinitCheck(HClinitCheck * check)5381 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5382   // We assume the class to not be null.
5383   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5384       check->GetLoadClass(), check, check->GetDexPc(), true);
5385   codegen_->AddSlowPath(slow_path);
5386   GenerateClassInitializationCheck(slow_path,
5387                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5388 }
5389 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5390 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5391     HLoadString::LoadKind desired_string_load_kind) {
5392   if (kEmitCompilerReadBarrier) {
5393     switch (desired_string_load_kind) {
5394       case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5395       case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5396       case HLoadString::LoadKind::kBootImageAddress:
5397         // TODO: Implement for read barrier.
5398         return HLoadString::LoadKind::kDexCacheViaMethod;
5399       default:
5400         break;
5401     }
5402   }
5403   switch (desired_string_load_kind) {
5404     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5405       DCHECK(!GetCompilerOptions().GetCompilePic());
5406       // We prefer the always-available RIP-relative address for the x86-64 boot image.
5407       return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5408     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5409       DCHECK(GetCompilerOptions().GetCompilePic());
5410       break;
5411     case HLoadString::LoadKind::kBootImageAddress:
5412       break;
5413     case HLoadString::LoadKind::kDexCacheAddress:
5414       DCHECK(Runtime::Current()->UseJitCompilation());
5415       break;
5416     case HLoadString::LoadKind::kDexCachePcRelative:
5417       DCHECK(!Runtime::Current()->UseJitCompilation());
5418       break;
5419     case HLoadString::LoadKind::kDexCacheViaMethod:
5420       break;
5421   }
5422   return desired_string_load_kind;
5423 }
5424 
VisitLoadString(HLoadString * load)5425 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5426   LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5427       ? LocationSummary::kCallOnSlowPath
5428       : LocationSummary::kNoCall;
5429   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5430   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5431     locations->SetInAt(0, Location::RequiresRegister());
5432   }
5433   locations->SetOut(Location::RequiresRegister());
5434 }
5435 
VisitLoadString(HLoadString * load)5436 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5437   LocationSummary* locations = load->GetLocations();
5438   Location out_loc = locations->Out();
5439   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5440 
5441   switch (load->GetLoadKind()) {
5442     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5443       DCHECK(!kEmitCompilerReadBarrier);
5444       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5445       codegen_->RecordStringPatch(load);
5446       return;  // No dex cache slow path.
5447     }
5448     case HLoadString::LoadKind::kBootImageAddress: {
5449       DCHECK(!kEmitCompilerReadBarrier);
5450       DCHECK_NE(load->GetAddress(), 0u);
5451       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5452       __ movl(out, Immediate(address));  // Zero-extended.
5453       codegen_->RecordSimplePatch();
5454       return;  // No dex cache slow path.
5455     }
5456     case HLoadString::LoadKind::kDexCacheAddress: {
5457       DCHECK_NE(load->GetAddress(), 0u);
5458       if (IsUint<32>(load->GetAddress())) {
5459         Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5460         GenerateGcRootFieldLoad(load, out_loc, address);
5461       } else {
5462         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5463         __ movq(out, Immediate(load->GetAddress()));
5464         GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5465       }
5466       break;
5467     }
5468     case HLoadString::LoadKind::kDexCachePcRelative: {
5469       uint32_t offset = load->GetDexCacheElementOffset();
5470       Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5471       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5472                                           /* no_rip */ false);
5473       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5474       break;
5475     }
5476     case HLoadString::LoadKind::kDexCacheViaMethod: {
5477       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5478 
5479       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5480       GenerateGcRootFieldLoad(
5481           load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5482       // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5483       __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5484       // /* GcRoot<mirror::String> */ out = out[string_index]
5485       GenerateGcRootFieldLoad(
5486           load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5487       break;
5488     }
5489     default:
5490       LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5491       UNREACHABLE();
5492   }
5493 
5494   if (!load->IsInDexCache()) {
5495     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5496     codegen_->AddSlowPath(slow_path);
5497     __ testl(out, out);
5498     __ j(kEqual, slow_path->GetEntryLabel());
5499     __ Bind(slow_path->GetExitLabel());
5500   }
5501 }
5502 
GetExceptionTlsAddress()5503 static Address GetExceptionTlsAddress() {
5504   return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5505                            /* no_rip */ true);
5506 }
5507 
VisitLoadException(HLoadException * load)5508 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5509   LocationSummary* locations =
5510       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5511   locations->SetOut(Location::RequiresRegister());
5512 }
5513 
VisitLoadException(HLoadException * load)5514 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5515   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5516 }
5517 
VisitClearException(HClearException * clear)5518 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5519   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5520 }
5521 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5522 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5523   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5524 }
5525 
VisitThrow(HThrow * instruction)5526 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5527   LocationSummary* locations =
5528       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5529   InvokeRuntimeCallingConvention calling_convention;
5530   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5531 }
5532 
VisitThrow(HThrow * instruction)5533 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5534   codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5535                           instruction,
5536                           instruction->GetDexPc(),
5537                           nullptr);
5538   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5539 }
5540 
TypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5541 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5542   return kEmitCompilerReadBarrier &&
5543       (kUseBakerReadBarrier ||
5544        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5545        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5546        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5547 }
5548 
VisitInstanceOf(HInstanceOf * instruction)5549 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5550   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5551   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5552   switch (type_check_kind) {
5553     case TypeCheckKind::kExactCheck:
5554     case TypeCheckKind::kAbstractClassCheck:
5555     case TypeCheckKind::kClassHierarchyCheck:
5556     case TypeCheckKind::kArrayObjectCheck:
5557       call_kind =
5558           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5559       break;
5560     case TypeCheckKind::kArrayCheck:
5561     case TypeCheckKind::kUnresolvedCheck:
5562     case TypeCheckKind::kInterfaceCheck:
5563       call_kind = LocationSummary::kCallOnSlowPath;
5564       break;
5565   }
5566 
5567   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5568   locations->SetInAt(0, Location::RequiresRegister());
5569   locations->SetInAt(1, Location::Any());
5570   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5571   locations->SetOut(Location::RequiresRegister());
5572   // When read barriers are enabled, we need a temporary register for
5573   // some cases.
5574   if (TypeCheckNeedsATemporary(type_check_kind)) {
5575     locations->AddTemp(Location::RequiresRegister());
5576   }
5577 }
5578 
VisitInstanceOf(HInstanceOf * instruction)5579 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5580   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5581   LocationSummary* locations = instruction->GetLocations();
5582   Location obj_loc = locations->InAt(0);
5583   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5584   Location cls = locations->InAt(1);
5585   Location out_loc =  locations->Out();
5586   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5587   Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5588       locations->GetTemp(0) :
5589       Location::NoLocation();
5590   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5591   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5592   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5593   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5594   SlowPathCode* slow_path = nullptr;
5595   NearLabel done, zero;
5596 
5597   // Return 0 if `obj` is null.
5598   // Avoid null check if we know obj is not null.
5599   if (instruction->MustDoNullCheck()) {
5600     __ testl(obj, obj);
5601     __ j(kEqual, &zero);
5602   }
5603 
5604   // /* HeapReference<Class> */ out = obj->klass_
5605   GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5606 
5607   switch (type_check_kind) {
5608     case TypeCheckKind::kExactCheck: {
5609       if (cls.IsRegister()) {
5610         __ cmpl(out, cls.AsRegister<CpuRegister>());
5611       } else {
5612         DCHECK(cls.IsStackSlot()) << cls;
5613         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5614       }
5615       if (zero.IsLinked()) {
5616         // Classes must be equal for the instanceof to succeed.
5617         __ j(kNotEqual, &zero);
5618         __ movl(out, Immediate(1));
5619         __ jmp(&done);
5620       } else {
5621         __ setcc(kEqual, out);
5622         // setcc only sets the low byte.
5623         __ andl(out, Immediate(1));
5624       }
5625       break;
5626     }
5627 
5628     case TypeCheckKind::kAbstractClassCheck: {
5629       // If the class is abstract, we eagerly fetch the super class of the
5630       // object to avoid doing a comparison we know will fail.
5631       NearLabel loop, success;
5632       __ Bind(&loop);
5633       // /* HeapReference<Class> */ out = out->super_class_
5634       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5635       __ testl(out, out);
5636       // If `out` is null, we use it for the result, and jump to `done`.
5637       __ j(kEqual, &done);
5638       if (cls.IsRegister()) {
5639         __ cmpl(out, cls.AsRegister<CpuRegister>());
5640       } else {
5641         DCHECK(cls.IsStackSlot()) << cls;
5642         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5643       }
5644       __ j(kNotEqual, &loop);
5645       __ movl(out, Immediate(1));
5646       if (zero.IsLinked()) {
5647         __ jmp(&done);
5648       }
5649       break;
5650     }
5651 
5652     case TypeCheckKind::kClassHierarchyCheck: {
5653       // Walk over the class hierarchy to find a match.
5654       NearLabel loop, success;
5655       __ Bind(&loop);
5656       if (cls.IsRegister()) {
5657         __ cmpl(out, cls.AsRegister<CpuRegister>());
5658       } else {
5659         DCHECK(cls.IsStackSlot()) << cls;
5660         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5661       }
5662       __ j(kEqual, &success);
5663       // /* HeapReference<Class> */ out = out->super_class_
5664       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5665       __ testl(out, out);
5666       __ j(kNotEqual, &loop);
5667       // If `out` is null, we use it for the result, and jump to `done`.
5668       __ jmp(&done);
5669       __ Bind(&success);
5670       __ movl(out, Immediate(1));
5671       if (zero.IsLinked()) {
5672         __ jmp(&done);
5673       }
5674       break;
5675     }
5676 
5677     case TypeCheckKind::kArrayObjectCheck: {
5678       // Do an exact check.
5679       NearLabel exact_check;
5680       if (cls.IsRegister()) {
5681         __ cmpl(out, cls.AsRegister<CpuRegister>());
5682       } else {
5683         DCHECK(cls.IsStackSlot()) << cls;
5684         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5685       }
5686       __ j(kEqual, &exact_check);
5687       // Otherwise, we need to check that the object's class is a non-primitive array.
5688       // /* HeapReference<Class> */ out = out->component_type_
5689       GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5690       __ testl(out, out);
5691       // If `out` is null, we use it for the result, and jump to `done`.
5692       __ j(kEqual, &done);
5693       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5694       __ j(kNotEqual, &zero);
5695       __ Bind(&exact_check);
5696       __ movl(out, Immediate(1));
5697       __ jmp(&done);
5698       break;
5699     }
5700 
5701     case TypeCheckKind::kArrayCheck: {
5702       if (cls.IsRegister()) {
5703         __ cmpl(out, cls.AsRegister<CpuRegister>());
5704       } else {
5705         DCHECK(cls.IsStackSlot()) << cls;
5706         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5707       }
5708       DCHECK(locations->OnlyCallsOnSlowPath());
5709       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5710                                                                        /* is_fatal */ false);
5711       codegen_->AddSlowPath(slow_path);
5712       __ j(kNotEqual, slow_path->GetEntryLabel());
5713       __ movl(out, Immediate(1));
5714       if (zero.IsLinked()) {
5715         __ jmp(&done);
5716       }
5717       break;
5718     }
5719 
5720     case TypeCheckKind::kUnresolvedCheck:
5721     case TypeCheckKind::kInterfaceCheck: {
5722       // Note that we indeed only call on slow path, but we always go
5723       // into the slow path for the unresolved and interface check
5724       // cases.
5725       //
5726       // We cannot directly call the InstanceofNonTrivial runtime
5727       // entry point without resorting to a type checking slow path
5728       // here (i.e. by calling InvokeRuntime directly), as it would
5729       // require to assign fixed registers for the inputs of this
5730       // HInstanceOf instruction (following the runtime calling
5731       // convention), which might be cluttered by the potential first
5732       // read barrier emission at the beginning of this method.
5733       //
5734       // TODO: Introduce a new runtime entry point taking the object
5735       // to test (instead of its class) as argument, and let it deal
5736       // with the read barrier issues. This will let us refactor this
5737       // case of the `switch` code as it was previously (with a direct
5738       // call to the runtime not using a type checking slow path).
5739       // This should also be beneficial for the other cases above.
5740       DCHECK(locations->OnlyCallsOnSlowPath());
5741       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5742                                                                        /* is_fatal */ false);
5743       codegen_->AddSlowPath(slow_path);
5744       __ jmp(slow_path->GetEntryLabel());
5745       if (zero.IsLinked()) {
5746         __ jmp(&done);
5747       }
5748       break;
5749     }
5750   }
5751 
5752   if (zero.IsLinked()) {
5753     __ Bind(&zero);
5754     __ xorl(out, out);
5755   }
5756 
5757   if (done.IsLinked()) {
5758     __ Bind(&done);
5759   }
5760 
5761   if (slow_path != nullptr) {
5762     __ Bind(slow_path->GetExitLabel());
5763   }
5764 }
5765 
VisitCheckCast(HCheckCast * instruction)5766 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5767   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5768   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5769   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5770   switch (type_check_kind) {
5771     case TypeCheckKind::kExactCheck:
5772     case TypeCheckKind::kAbstractClassCheck:
5773     case TypeCheckKind::kClassHierarchyCheck:
5774     case TypeCheckKind::kArrayObjectCheck:
5775       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5776           LocationSummary::kCallOnSlowPath :
5777           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
5778       break;
5779     case TypeCheckKind::kArrayCheck:
5780     case TypeCheckKind::kUnresolvedCheck:
5781     case TypeCheckKind::kInterfaceCheck:
5782       call_kind = LocationSummary::kCallOnSlowPath;
5783       break;
5784   }
5785   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5786   locations->SetInAt(0, Location::RequiresRegister());
5787   locations->SetInAt(1, Location::Any());
5788   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5789   locations->AddTemp(Location::RequiresRegister());
5790   // When read barriers are enabled, we need an additional temporary
5791   // register for some cases.
5792   if (TypeCheckNeedsATemporary(type_check_kind)) {
5793     locations->AddTemp(Location::RequiresRegister());
5794   }
5795 }
5796 
VisitCheckCast(HCheckCast * instruction)5797 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5798   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5799   LocationSummary* locations = instruction->GetLocations();
5800   Location obj_loc = locations->InAt(0);
5801   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5802   Location cls = locations->InAt(1);
5803   Location temp_loc = locations->GetTemp(0);
5804   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5805   Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5806       locations->GetTemp(1) :
5807       Location::NoLocation();
5808   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5809   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5810   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5811   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5812 
5813   bool is_type_check_slow_path_fatal =
5814       (type_check_kind == TypeCheckKind::kExactCheck ||
5815        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5816        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5817        type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5818       !instruction->CanThrowIntoCatchBlock();
5819   SlowPathCode* type_check_slow_path =
5820       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5821                                                            is_type_check_slow_path_fatal);
5822   codegen_->AddSlowPath(type_check_slow_path);
5823 
5824   switch (type_check_kind) {
5825     case TypeCheckKind::kExactCheck:
5826     case TypeCheckKind::kArrayCheck: {
5827       NearLabel done;
5828       // Avoid null check if we know obj is not null.
5829       if (instruction->MustDoNullCheck()) {
5830         __ testl(obj, obj);
5831         __ j(kEqual, &done);
5832       }
5833 
5834       // /* HeapReference<Class> */ temp = obj->klass_
5835       GenerateReferenceLoadTwoRegisters(
5836           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5837 
5838       if (cls.IsRegister()) {
5839         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5840       } else {
5841         DCHECK(cls.IsStackSlot()) << cls;
5842         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5843       }
5844       // Jump to slow path for throwing the exception or doing a
5845       // more involved array check.
5846       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5847       __ Bind(&done);
5848       break;
5849     }
5850 
5851     case TypeCheckKind::kAbstractClassCheck: {
5852       NearLabel done;
5853       // Avoid null check if we know obj is not null.
5854       if (instruction->MustDoNullCheck()) {
5855         __ testl(obj, obj);
5856         __ j(kEqual, &done);
5857       }
5858 
5859       // /* HeapReference<Class> */ temp = obj->klass_
5860       GenerateReferenceLoadTwoRegisters(
5861           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5862 
5863       // If the class is abstract, we eagerly fetch the super class of the
5864       // object to avoid doing a comparison we know will fail.
5865       NearLabel loop, compare_classes;
5866       __ Bind(&loop);
5867       // /* HeapReference<Class> */ temp = temp->super_class_
5868       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5869 
5870       // If the class reference currently in `temp` is not null, jump
5871       // to the `compare_classes` label to compare it with the checked
5872       // class.
5873       __ testl(temp, temp);
5874       __ j(kNotEqual, &compare_classes);
5875       // Otherwise, jump to the slow path to throw the exception.
5876       //
5877       // But before, move back the object's class into `temp` before
5878       // going into the slow path, as it has been overwritten in the
5879       // meantime.
5880       // /* HeapReference<Class> */ temp = obj->klass_
5881       GenerateReferenceLoadTwoRegisters(
5882           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5883       __ jmp(type_check_slow_path->GetEntryLabel());
5884 
5885       __ Bind(&compare_classes);
5886       if (cls.IsRegister()) {
5887         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5888       } else {
5889         DCHECK(cls.IsStackSlot()) << cls;
5890         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5891       }
5892       __ j(kNotEqual, &loop);
5893       __ Bind(&done);
5894       break;
5895     }
5896 
5897     case TypeCheckKind::kClassHierarchyCheck: {
5898       NearLabel done;
5899       // Avoid null check if we know obj is not null.
5900       if (instruction->MustDoNullCheck()) {
5901         __ testl(obj, obj);
5902         __ j(kEqual, &done);
5903       }
5904 
5905       // /* HeapReference<Class> */ temp = obj->klass_
5906       GenerateReferenceLoadTwoRegisters(
5907           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5908 
5909       // Walk over the class hierarchy to find a match.
5910       NearLabel loop;
5911       __ Bind(&loop);
5912       if (cls.IsRegister()) {
5913         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5914       } else {
5915         DCHECK(cls.IsStackSlot()) << cls;
5916         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5917       }
5918       __ j(kEqual, &done);
5919 
5920       // /* HeapReference<Class> */ temp = temp->super_class_
5921       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5922 
5923       // If the class reference currently in `temp` is not null, jump
5924       // back at the beginning of the loop.
5925       __ testl(temp, temp);
5926       __ j(kNotEqual, &loop);
5927       // Otherwise, jump to the slow path to throw the exception.
5928       //
5929       // But before, move back the object's class into `temp` before
5930       // going into the slow path, as it has been overwritten in the
5931       // meantime.
5932       // /* HeapReference<Class> */ temp = obj->klass_
5933       GenerateReferenceLoadTwoRegisters(
5934           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5935       __ jmp(type_check_slow_path->GetEntryLabel());
5936       __ Bind(&done);
5937       break;
5938     }
5939 
5940     case TypeCheckKind::kArrayObjectCheck: {
5941       // We cannot use a NearLabel here, as its range might be too
5942       // short in some cases when read barriers are enabled.  This has
5943       // been observed for instance when the code emitted for this
5944       // case uses high x86-64 registers (R8-R15).
5945       Label done;
5946       // Avoid null check if we know obj is not null.
5947       if (instruction->MustDoNullCheck()) {
5948         __ testl(obj, obj);
5949         __ j(kEqual, &done);
5950       }
5951 
5952       // /* HeapReference<Class> */ temp = obj->klass_
5953       GenerateReferenceLoadTwoRegisters(
5954           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5955 
5956       // Do an exact check.
5957       NearLabel check_non_primitive_component_type;
5958       if (cls.IsRegister()) {
5959         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5960       } else {
5961         DCHECK(cls.IsStackSlot()) << cls;
5962         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5963       }
5964       __ j(kEqual, &done);
5965 
5966       // Otherwise, we need to check that the object's class is a non-primitive array.
5967       // /* HeapReference<Class> */ temp = temp->component_type_
5968       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5969 
5970       // If the component type is not null (i.e. the object is indeed
5971       // an array), jump to label `check_non_primitive_component_type`
5972       // to further check that this component type is not a primitive
5973       // type.
5974       __ testl(temp, temp);
5975       __ j(kNotEqual, &check_non_primitive_component_type);
5976       // Otherwise, jump to the slow path to throw the exception.
5977       //
5978       // But before, move back the object's class into `temp` before
5979       // going into the slow path, as it has been overwritten in the
5980       // meantime.
5981       // /* HeapReference<Class> */ temp = obj->klass_
5982       GenerateReferenceLoadTwoRegisters(
5983           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5984       __ jmp(type_check_slow_path->GetEntryLabel());
5985 
5986       __ Bind(&check_non_primitive_component_type);
5987       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5988       __ j(kEqual, &done);
5989       // Same comment as above regarding `temp` and the slow path.
5990       // /* HeapReference<Class> */ temp = obj->klass_
5991       GenerateReferenceLoadTwoRegisters(
5992           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5993       __ jmp(type_check_slow_path->GetEntryLabel());
5994       __ Bind(&done);
5995       break;
5996     }
5997 
5998     case TypeCheckKind::kUnresolvedCheck:
5999     case TypeCheckKind::kInterfaceCheck:
6000       NearLabel done;
6001       // Avoid null check if we know obj is not null.
6002       if (instruction->MustDoNullCheck()) {
6003         __ testl(obj, obj);
6004         __ j(kEqual, &done);
6005       }
6006 
6007       // /* HeapReference<Class> */ temp = obj->klass_
6008       GenerateReferenceLoadTwoRegisters(
6009           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6010 
6011       // We always go into the type check slow path for the unresolved
6012       // and interface check cases.
6013       //
6014       // We cannot directly call the CheckCast runtime entry point
6015       // without resorting to a type checking slow path here (i.e. by
6016       // calling InvokeRuntime directly), as it would require to
6017       // assign fixed registers for the inputs of this HInstanceOf
6018       // instruction (following the runtime calling convention), which
6019       // might be cluttered by the potential first read barrier
6020       // emission at the beginning of this method.
6021       //
6022       // TODO: Introduce a new runtime entry point taking the object
6023       // to test (instead of its class) as argument, and let it deal
6024       // with the read barrier issues. This will let us refactor this
6025       // case of the `switch` code as it was previously (with a direct
6026       // call to the runtime not using a type checking slow path).
6027       // This should also be beneficial for the other cases above.
6028       __ jmp(type_check_slow_path->GetEntryLabel());
6029       __ Bind(&done);
6030       break;
6031   }
6032 
6033   __ Bind(type_check_slow_path->GetExitLabel());
6034 }
6035 
VisitMonitorOperation(HMonitorOperation * instruction)6036 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6037   LocationSummary* locations =
6038       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6039   InvokeRuntimeCallingConvention calling_convention;
6040   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6041 }
6042 
VisitMonitorOperation(HMonitorOperation * instruction)6043 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6044   codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6045                                                  : QUICK_ENTRY_POINT(pUnlockObject),
6046                           instruction,
6047                           instruction->GetDexPc(),
6048                           nullptr);
6049   if (instruction->IsEnter()) {
6050     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6051   } else {
6052     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6053   }
6054 }
6055 
VisitAnd(HAnd * instruction)6056 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6057 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6058 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6059 
HandleBitwiseOperation(HBinaryOperation * instruction)6060 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6061   LocationSummary* locations =
6062       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6063   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6064          || instruction->GetResultType() == Primitive::kPrimLong);
6065   locations->SetInAt(0, Location::RequiresRegister());
6066   locations->SetInAt(1, Location::Any());
6067   locations->SetOut(Location::SameAsFirstInput());
6068 }
6069 
VisitAnd(HAnd * instruction)6070 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6071   HandleBitwiseOperation(instruction);
6072 }
6073 
VisitOr(HOr * instruction)6074 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6075   HandleBitwiseOperation(instruction);
6076 }
6077 
VisitXor(HXor * instruction)6078 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6079   HandleBitwiseOperation(instruction);
6080 }
6081 
HandleBitwiseOperation(HBinaryOperation * instruction)6082 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6083   LocationSummary* locations = instruction->GetLocations();
6084   Location first = locations->InAt(0);
6085   Location second = locations->InAt(1);
6086   DCHECK(first.Equals(locations->Out()));
6087 
6088   if (instruction->GetResultType() == Primitive::kPrimInt) {
6089     if (second.IsRegister()) {
6090       if (instruction->IsAnd()) {
6091         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6092       } else if (instruction->IsOr()) {
6093         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6094       } else {
6095         DCHECK(instruction->IsXor());
6096         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6097       }
6098     } else if (second.IsConstant()) {
6099       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6100       if (instruction->IsAnd()) {
6101         __ andl(first.AsRegister<CpuRegister>(), imm);
6102       } else if (instruction->IsOr()) {
6103         __ orl(first.AsRegister<CpuRegister>(), imm);
6104       } else {
6105         DCHECK(instruction->IsXor());
6106         __ xorl(first.AsRegister<CpuRegister>(), imm);
6107       }
6108     } else {
6109       Address address(CpuRegister(RSP), second.GetStackIndex());
6110       if (instruction->IsAnd()) {
6111         __ andl(first.AsRegister<CpuRegister>(), address);
6112       } else if (instruction->IsOr()) {
6113         __ orl(first.AsRegister<CpuRegister>(), address);
6114       } else {
6115         DCHECK(instruction->IsXor());
6116         __ xorl(first.AsRegister<CpuRegister>(), address);
6117       }
6118     }
6119   } else {
6120     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6121     CpuRegister first_reg = first.AsRegister<CpuRegister>();
6122     bool second_is_constant = false;
6123     int64_t value = 0;
6124     if (second.IsConstant()) {
6125       second_is_constant = true;
6126       value = second.GetConstant()->AsLongConstant()->GetValue();
6127     }
6128     bool is_int32_value = IsInt<32>(value);
6129 
6130     if (instruction->IsAnd()) {
6131       if (second_is_constant) {
6132         if (is_int32_value) {
6133           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6134         } else {
6135           __ andq(first_reg, codegen_->LiteralInt64Address(value));
6136         }
6137       } else if (second.IsDoubleStackSlot()) {
6138         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6139       } else {
6140         __ andq(first_reg, second.AsRegister<CpuRegister>());
6141       }
6142     } else if (instruction->IsOr()) {
6143       if (second_is_constant) {
6144         if (is_int32_value) {
6145           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6146         } else {
6147           __ orq(first_reg, codegen_->LiteralInt64Address(value));
6148         }
6149       } else if (second.IsDoubleStackSlot()) {
6150         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6151       } else {
6152         __ orq(first_reg, second.AsRegister<CpuRegister>());
6153       }
6154     } else {
6155       DCHECK(instruction->IsXor());
6156       if (second_is_constant) {
6157         if (is_int32_value) {
6158           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6159         } else {
6160           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6161         }
6162       } else if (second.IsDoubleStackSlot()) {
6163         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6164       } else {
6165         __ xorq(first_reg, second.AsRegister<CpuRegister>());
6166       }
6167     }
6168   }
6169 }
6170 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp)6171 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6172                                                                       Location out,
6173                                                                       uint32_t offset,
6174                                                                       Location maybe_temp) {
6175   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6176   if (kEmitCompilerReadBarrier) {
6177     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6178     if (kUseBakerReadBarrier) {
6179       // Load with fast path based Baker's read barrier.
6180       // /* HeapReference<Object> */ out = *(out + offset)
6181       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6182           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6183     } else {
6184       // Load with slow path based read barrier.
6185       // Save the value of `out` into `maybe_temp` before overwriting it
6186       // in the following move operation, as we will need it for the
6187       // read barrier below.
6188       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6189       // /* HeapReference<Object> */ out = *(out + offset)
6190       __ movl(out_reg, Address(out_reg, offset));
6191       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6192     }
6193   } else {
6194     // Plain load with no read barrier.
6195     // /* HeapReference<Object> */ out = *(out + offset)
6196     __ movl(out_reg, Address(out_reg, offset));
6197     __ MaybeUnpoisonHeapReference(out_reg);
6198   }
6199 }
6200 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp)6201 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6202                                                                        Location out,
6203                                                                        Location obj,
6204                                                                        uint32_t offset,
6205                                                                        Location maybe_temp) {
6206   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6207   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6208   if (kEmitCompilerReadBarrier) {
6209     if (kUseBakerReadBarrier) {
6210       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6211       // Load with fast path based Baker's read barrier.
6212       // /* HeapReference<Object> */ out = *(obj + offset)
6213       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6214           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6215     } else {
6216       // Load with slow path based read barrier.
6217       // /* HeapReference<Object> */ out = *(obj + offset)
6218       __ movl(out_reg, Address(obj_reg, offset));
6219       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6220     }
6221   } else {
6222     // Plain load with no read barrier.
6223     // /* HeapReference<Object> */ out = *(obj + offset)
6224     __ movl(out_reg, Address(obj_reg, offset));
6225     __ MaybeUnpoisonHeapReference(out_reg);
6226   }
6227 }
6228 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label)6229 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6230                                                              Location root,
6231                                                              const Address& address,
6232                                                              Label* fixup_label) {
6233   CpuRegister root_reg = root.AsRegister<CpuRegister>();
6234   if (kEmitCompilerReadBarrier) {
6235     if (kUseBakerReadBarrier) {
6236       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6237       // Baker's read barrier are used:
6238       //
6239       //   root = *address;
6240       //   if (Thread::Current()->GetIsGcMarking()) {
6241       //     root = ReadBarrier::Mark(root)
6242       //   }
6243 
6244       // /* GcRoot<mirror::Object> */ root = *address
6245       __ movl(root_reg, address);
6246       if (fixup_label != nullptr) {
6247         __ Bind(fixup_label);
6248       }
6249       static_assert(
6250           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6251           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6252           "have different sizes.");
6253       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6254                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
6255                     "have different sizes.");
6256 
6257       // Slow path used to mark the GC root `root`.
6258       SlowPathCode* slow_path =
6259           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6260       codegen_->AddSlowPath(slow_path);
6261 
6262       __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6263                                       /* no_rip */ true),
6264                     Immediate(0));
6265       __ j(kNotEqual, slow_path->GetEntryLabel());
6266       __ Bind(slow_path->GetExitLabel());
6267     } else {
6268       // GC root loaded through a slow path for read barriers other
6269       // than Baker's.
6270       // /* GcRoot<mirror::Object>* */ root = address
6271       __ leaq(root_reg, address);
6272       if (fixup_label != nullptr) {
6273         __ Bind(fixup_label);
6274       }
6275       // /* mirror::Object* */ root = root->Read()
6276       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6277     }
6278   } else {
6279     // Plain GC root load with no read barrier.
6280     // /* GcRoot<mirror::Object> */ root = *address
6281     __ movl(root_reg, address);
6282     if (fixup_label != nullptr) {
6283       __ Bind(fixup_label);
6284     }
6285     // Note that GC roots are not affected by heap poisoning, thus we
6286     // do not have to unpoison `root_reg` here.
6287   }
6288 }
6289 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,Location temp,bool needs_null_check)6290 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6291                                                                 Location ref,
6292                                                                 CpuRegister obj,
6293                                                                 uint32_t offset,
6294                                                                 Location temp,
6295                                                                 bool needs_null_check) {
6296   DCHECK(kEmitCompilerReadBarrier);
6297   DCHECK(kUseBakerReadBarrier);
6298 
6299   // /* HeapReference<Object> */ ref = *(obj + offset)
6300   Address src(obj, offset);
6301   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6302 }
6303 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)6304 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6305                                                                 Location ref,
6306                                                                 CpuRegister obj,
6307                                                                 uint32_t data_offset,
6308                                                                 Location index,
6309                                                                 Location temp,
6310                                                                 bool needs_null_check) {
6311   DCHECK(kEmitCompilerReadBarrier);
6312   DCHECK(kUseBakerReadBarrier);
6313 
6314   // /* HeapReference<Object> */ ref =
6315   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6316   Address src = index.IsConstant() ?
6317       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6318       Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6319   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6320 }
6321 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,Location temp,bool needs_null_check)6322 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6323                                                                     Location ref,
6324                                                                     CpuRegister obj,
6325                                                                     const Address& src,
6326                                                                     Location temp,
6327                                                                     bool needs_null_check) {
6328   DCHECK(kEmitCompilerReadBarrier);
6329   DCHECK(kUseBakerReadBarrier);
6330 
6331   // In slow path based read barriers, the read barrier call is
6332   // inserted after the original load. However, in fast path based
6333   // Baker's read barriers, we need to perform the load of
6334   // mirror::Object::monitor_ *before* the original reference load.
6335   // This load-load ordering is required by the read barrier.
6336   // The fast path/slow path (for Baker's algorithm) should look like:
6337   //
6338   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6339   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6340   //   HeapReference<Object> ref = *src;  // Original reference load.
6341   //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6342   //   if (is_gray) {
6343   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6344   //   }
6345   //
6346   // Note: the original implementation in ReadBarrier::Barrier is
6347   // slightly more complex as:
6348   // - it implements the load-load fence using a data dependency on
6349   //   the high-bits of rb_state, which are expected to be all zeroes
6350   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6351   //   here, which is a no-op thanks to the x86-64 memory model);
6352   // - it performs additional checks that we do not do here for
6353   //   performance reasons.
6354 
6355   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6356   CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6357   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6358 
6359   // /* int32_t */ monitor = obj->monitor_
6360   __ movl(temp_reg, Address(obj, monitor_offset));
6361   if (needs_null_check) {
6362     MaybeRecordImplicitNullCheck(instruction);
6363   }
6364   // /* LockWord */ lock_word = LockWord(monitor)
6365   static_assert(sizeof(LockWord) == sizeof(int32_t),
6366                 "art::LockWord and int32_t have different sizes.");
6367   // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6368   __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6369   __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6370   static_assert(
6371       LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6372       "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6373 
6374   // Load fence to prevent load-load reordering.
6375   // Note that this is a no-op, thanks to the x86-64 memory model.
6376   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6377 
6378   // The actual reference load.
6379   // /* HeapReference<Object> */ ref = *src
6380   __ movl(ref_reg, src);
6381 
6382   // Object* ref = ref_addr->AsMirrorPtr()
6383   __ MaybeUnpoisonHeapReference(ref_reg);
6384 
6385   // Slow path used to mark the object `ref` when it is gray.
6386   SlowPathCode* slow_path =
6387       new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6388   AddSlowPath(slow_path);
6389 
6390   // if (rb_state == ReadBarrier::gray_ptr_)
6391   //   ref = ReadBarrier::Mark(ref);
6392   __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6393   __ j(kEqual, slow_path->GetEntryLabel());
6394   __ Bind(slow_path->GetExitLabel());
6395 }
6396 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6397 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6398                                                   Location out,
6399                                                   Location ref,
6400                                                   Location obj,
6401                                                   uint32_t offset,
6402                                                   Location index) {
6403   DCHECK(kEmitCompilerReadBarrier);
6404 
6405   // Insert a slow path based read barrier *after* the reference load.
6406   //
6407   // If heap poisoning is enabled, the unpoisoning of the loaded
6408   // reference will be carried out by the runtime within the slow
6409   // path.
6410   //
6411   // Note that `ref` currently does not get unpoisoned (when heap
6412   // poisoning is enabled), which is alright as the `ref` argument is
6413   // not used by the artReadBarrierSlow entry point.
6414   //
6415   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6416   SlowPathCode* slow_path = new (GetGraph()->GetArena())
6417       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6418   AddSlowPath(slow_path);
6419 
6420   __ jmp(slow_path->GetEntryLabel());
6421   __ Bind(slow_path->GetExitLabel());
6422 }
6423 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6424 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6425                                                        Location out,
6426                                                        Location ref,
6427                                                        Location obj,
6428                                                        uint32_t offset,
6429                                                        Location index) {
6430   if (kEmitCompilerReadBarrier) {
6431     // Baker's read barriers shall be handled by the fast path
6432     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6433     DCHECK(!kUseBakerReadBarrier);
6434     // If heap poisoning is enabled, unpoisoning will be taken care of
6435     // by the runtime within the slow path.
6436     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6437   } else if (kPoisonHeapReferences) {
6438     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6439   }
6440 }
6441 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6442 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6443                                                          Location out,
6444                                                          Location root) {
6445   DCHECK(kEmitCompilerReadBarrier);
6446 
6447   // Insert a slow path based read barrier *after* the GC root load.
6448   //
6449   // Note that GC roots are not affected by heap poisoning, so we do
6450   // not need to do anything special for this here.
6451   SlowPathCode* slow_path =
6452       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6453   AddSlowPath(slow_path);
6454 
6455   __ jmp(slow_path->GetEntryLabel());
6456   __ Bind(slow_path->GetExitLabel());
6457 }
6458 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6459 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6460   // Nothing to do, this should be removed during prepare for register allocator.
6461   LOG(FATAL) << "Unreachable";
6462 }
6463 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6464 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6465   // Nothing to do, this should be removed during prepare for register allocator.
6466   LOG(FATAL) << "Unreachable";
6467 }
6468 
6469 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6470 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6471   LocationSummary* locations =
6472       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6473   locations->SetInAt(0, Location::RequiresRegister());
6474   locations->AddTemp(Location::RequiresRegister());
6475   locations->AddTemp(Location::RequiresRegister());
6476 }
6477 
VisitPackedSwitch(HPackedSwitch * switch_instr)6478 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479   int32_t lower_bound = switch_instr->GetStartValue();
6480   uint32_t num_entries = switch_instr->GetNumEntries();
6481   LocationSummary* locations = switch_instr->GetLocations();
6482   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6483   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6484   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6485   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6486 
6487   // Should we generate smaller inline compare/jumps?
6488   if (num_entries <= kPackedSwitchJumpTableThreshold) {
6489     // Figure out the correct compare values and jump conditions.
6490     // Handle the first compare/branch as a special case because it might
6491     // jump to the default case.
6492     DCHECK_GT(num_entries, 2u);
6493     Condition first_condition;
6494     uint32_t index;
6495     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6496     if (lower_bound != 0) {
6497       first_condition = kLess;
6498       __ cmpl(value_reg_in, Immediate(lower_bound));
6499       __ j(first_condition, codegen_->GetLabelOf(default_block));
6500       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6501 
6502       index = 1;
6503     } else {
6504       // Handle all the compare/jumps below.
6505       first_condition = kBelow;
6506       index = 0;
6507     }
6508 
6509     // Handle the rest of the compare/jumps.
6510     for (; index + 1 < num_entries; index += 2) {
6511       int32_t compare_to_value = lower_bound + index + 1;
6512       __ cmpl(value_reg_in, Immediate(compare_to_value));
6513       // Jump to successors[index] if value < case_value[index].
6514       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6515       // Jump to successors[index + 1] if value == case_value[index + 1].
6516       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6517     }
6518 
6519     if (index != num_entries) {
6520       // There are an odd number of entries. Handle the last one.
6521       DCHECK_EQ(index + 1, num_entries);
6522       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6523       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6524     }
6525 
6526     // And the default for any other value.
6527     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6528       __ jmp(codegen_->GetLabelOf(default_block));
6529     }
6530     return;
6531   }
6532 
6533   // Remove the bias, if needed.
6534   Register value_reg_out = value_reg_in.AsRegister();
6535   if (lower_bound != 0) {
6536     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6537     value_reg_out = temp_reg.AsRegister();
6538   }
6539   CpuRegister value_reg(value_reg_out);
6540 
6541   // Is the value in range?
6542   __ cmpl(value_reg, Immediate(num_entries - 1));
6543   __ j(kAbove, codegen_->GetLabelOf(default_block));
6544 
6545   // We are in the range of the table.
6546   // Load the address of the jump table in the constant area.
6547   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6548 
6549   // Load the (signed) offset from the jump table.
6550   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6551 
6552   // Add the offset to the address of the table base.
6553   __ addq(temp_reg, base_reg);
6554 
6555   // And jump.
6556   __ jmp(temp_reg);
6557 }
6558 
Load32BitValue(CpuRegister dest,int32_t value)6559 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6560   if (value == 0) {
6561     __ xorl(dest, dest);
6562   } else {
6563     __ movl(dest, Immediate(value));
6564   }
6565 }
6566 
Load64BitValue(CpuRegister dest,int64_t value)6567 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6568   if (value == 0) {
6569     // Clears upper bits too.
6570     __ xorl(dest, dest);
6571   } else if (IsUint<32>(value)) {
6572     // We can use a 32 bit move, as it will zero-extend and is shorter.
6573     __ movl(dest, Immediate(static_cast<int32_t>(value)));
6574   } else {
6575     __ movq(dest, Immediate(value));
6576   }
6577 }
6578 
Load32BitValue(XmmRegister dest,int32_t value)6579 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6580   if (value == 0) {
6581     __ xorps(dest, dest);
6582   } else {
6583     __ movss(dest, LiteralInt32Address(value));
6584   }
6585 }
6586 
Load64BitValue(XmmRegister dest,int64_t value)6587 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6588   if (value == 0) {
6589     __ xorpd(dest, dest);
6590   } else {
6591     __ movsd(dest, LiteralInt64Address(value));
6592   }
6593 }
6594 
Load32BitValue(XmmRegister dest,float value)6595 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6596   Load32BitValue(dest, bit_cast<int32_t, float>(value));
6597 }
6598 
Load64BitValue(XmmRegister dest,double value)6599 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6600   Load64BitValue(dest, bit_cast<int64_t, double>(value));
6601 }
6602 
Compare32BitValue(CpuRegister dest,int32_t value)6603 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6604   if (value == 0) {
6605     __ testl(dest, dest);
6606   } else {
6607     __ cmpl(dest, Immediate(value));
6608   }
6609 }
6610 
Compare64BitValue(CpuRegister dest,int64_t value)6611 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6612   if (IsInt<32>(value)) {
6613     if (value == 0) {
6614       __ testq(dest, dest);
6615     } else {
6616       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6617     }
6618   } else {
6619     // Value won't fit in an int.
6620     __ cmpq(dest, LiteralInt64Address(value));
6621   }
6622 }
6623 
Store64BitValueToStack(Location dest,int64_t value)6624 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6625   DCHECK(dest.IsDoubleStackSlot());
6626   if (IsInt<32>(value)) {
6627     // Can move directly as an int32 constant.
6628     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6629             Immediate(static_cast<int32_t>(value)));
6630   } else {
6631     Load64BitValue(CpuRegister(TMP), value);
6632     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6633   }
6634 }
6635 
6636 /**
6637  * Class to handle late fixup of offsets into constant area.
6638  */
6639 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6640  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6641   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6642       : codegen_(&codegen), offset_into_constant_area_(offset) {}
6643 
6644  protected:
SetOffset(size_t offset)6645   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6646 
6647   CodeGeneratorX86_64* codegen_;
6648 
6649  private:
Process(const MemoryRegion & region,int pos)6650   void Process(const MemoryRegion& region, int pos) OVERRIDE {
6651     // Patch the correct offset for the instruction.  We use the address of the
6652     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6653     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6654     int32_t relative_position = constant_offset - pos;
6655 
6656     // Patch in the right value.
6657     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6658   }
6659 
6660   // Location in constant area that the fixup refers to.
6661   size_t offset_into_constant_area_;
6662 };
6663 
6664 /**
6665  t * Class to handle late fixup of offsets to a jump table that will be created in the
6666  * constant area.
6667  */
6668 class JumpTableRIPFixup : public RIPFixup {
6669  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)6670   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6671       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6672 
CreateJumpTable()6673   void CreateJumpTable() {
6674     X86_64Assembler* assembler = codegen_->GetAssembler();
6675 
6676     // Ensure that the reference to the jump table has the correct offset.
6677     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6678     SetOffset(offset_in_constant_table);
6679 
6680     // Compute the offset from the start of the function to this jump table.
6681     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6682 
6683     // Populate the jump table with the correct values for the jump table.
6684     int32_t num_entries = switch_instr_->GetNumEntries();
6685     HBasicBlock* block = switch_instr_->GetBlock();
6686     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6687     // The value that we want is the target offset - the position of the table.
6688     for (int32_t i = 0; i < num_entries; i++) {
6689       HBasicBlock* b = successors[i];
6690       Label* l = codegen_->GetLabelOf(b);
6691       DCHECK(l->IsBound());
6692       int32_t offset_to_block = l->Position() - current_table_offset;
6693       assembler->AppendInt32(offset_to_block);
6694     }
6695   }
6696 
6697  private:
6698   const HPackedSwitch* switch_instr_;
6699 };
6700 
Finalize(CodeAllocator * allocator)6701 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6702   // Generate the constant area if needed.
6703   X86_64Assembler* assembler = GetAssembler();
6704   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6705     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6706     assembler->Align(4, 0);
6707     constant_area_start_ = assembler->CodeSize();
6708 
6709     // Populate any jump tables.
6710     for (auto jump_table : fixups_to_jump_tables_) {
6711       jump_table->CreateJumpTable();
6712     }
6713 
6714     // And now add the constant area to the generated code.
6715     assembler->AddConstantArea();
6716   }
6717 
6718   // And finish up.
6719   CodeGenerator::Finalize(allocator);
6720 }
6721 
LiteralDoubleAddress(double v)6722 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6723   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6724   return Address::RIP(fixup);
6725 }
6726 
LiteralFloatAddress(float v)6727 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6728   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6729   return Address::RIP(fixup);
6730 }
6731 
LiteralInt32Address(int32_t v)6732 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6733   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6734   return Address::RIP(fixup);
6735 }
6736 
LiteralInt64Address(int64_t v)6737 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6738   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6739   return Address::RIP(fixup);
6740 }
6741 
6742 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)6743 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6744   if (!trg.IsValid()) {
6745     DCHECK_EQ(type, Primitive::kPrimVoid);
6746     return;
6747   }
6748 
6749   DCHECK_NE(type, Primitive::kPrimVoid);
6750 
6751   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6752   if (trg.Equals(return_loc)) {
6753     return;
6754   }
6755 
6756   // Let the parallel move resolver take care of all of this.
6757   HParallelMove parallel_move(GetGraph()->GetArena());
6758   parallel_move.AddMove(return_loc, trg, type, nullptr);
6759   GetMoveResolver()->EmitNativeCode(&parallel_move);
6760 }
6761 
LiteralCaseTable(HPackedSwitch * switch_instr)6762 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6763   // Create a fixup to be used to create and address the jump table.
6764   JumpTableRIPFixup* table_fixup =
6765       new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6766 
6767   // We have to populate the jump tables.
6768   fixups_to_jump_tables_.push_back(table_fixup);
6769   return Address::RIP(table_fixup);
6770 }
6771 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)6772 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6773                                              const Address& addr_high,
6774                                              int64_t v,
6775                                              HInstruction* instruction) {
6776   if (IsInt<32>(v)) {
6777     int32_t v_32 = v;
6778     __ movq(addr_low, Immediate(v_32));
6779     MaybeRecordImplicitNullCheck(instruction);
6780   } else {
6781     // Didn't fit in a register.  Do it in pieces.
6782     int32_t low_v = Low32Bits(v);
6783     int32_t high_v = High32Bits(v);
6784     __ movl(addr_low, Immediate(low_v));
6785     MaybeRecordImplicitNullCheck(instruction);
6786     __ movl(addr_high, Immediate(high_v));
6787   }
6788 }
6789 
6790 #undef __
6791 
6792 }  // namespace x86_64
6793 }  // namespace art
6794