1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "heap_poisoning.h"
26 #include "intrinsics.h"
27 #include "intrinsics_x86_64.h"
28 #include "linker/linker_patch.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/class-inl.h"
32 #include "mirror/object_reference.h"
33 #include "thread.h"
34 #include "utils/assembler.h"
35 #include "utils/stack_checks.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/managed_register_x86_64.h"
38
39 namespace art {
40
41 template<class MirrorType>
42 class GcRoot;
43
44 namespace x86_64 {
45
46 static constexpr int kCurrentMethodStackOffset = 0;
47 static constexpr Register kMethodRegisterArgument = RDI;
48 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
49 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
50 // generates less code/data with a small num_entries.
51 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
52
53 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
54 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
55
56 static constexpr int kC2ConditionMask = 0x400;
57
58 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
59 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
60 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
61
62 class NullCheckSlowPathX86_64 : public SlowPathCode {
63 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)64 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
65
EmitNativeCode(CodeGenerator * codegen)66 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
67 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
68 __ Bind(GetEntryLabel());
69 if (instruction_->CanThrowIntoCatchBlock()) {
70 // Live registers will be restored in the catch block if caught.
71 SaveLiveRegisters(codegen, instruction_->GetLocations());
72 }
73 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
74 instruction_,
75 instruction_->GetDexPc(),
76 this);
77 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
78 }
79
IsFatal() const80 bool IsFatal() const OVERRIDE { return true; }
81
GetDescription() const82 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
83
84 private:
85 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
86 };
87
88 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
89 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)90 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
91
EmitNativeCode(CodeGenerator * codegen)92 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
93 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
94 __ Bind(GetEntryLabel());
95 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
96 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
97 }
98
IsFatal() const99 bool IsFatal() const OVERRIDE { return true; }
100
GetDescription() const101 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
102
103 private:
104 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
105 };
106
107 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
108 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)109 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
110 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
111
EmitNativeCode(CodeGenerator * codegen)112 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
113 __ Bind(GetEntryLabel());
114 if (type_ == DataType::Type::kInt32) {
115 if (is_div_) {
116 __ negl(cpu_reg_);
117 } else {
118 __ xorl(cpu_reg_, cpu_reg_);
119 }
120
121 } else {
122 DCHECK_EQ(DataType::Type::kInt64, type_);
123 if (is_div_) {
124 __ negq(cpu_reg_);
125 } else {
126 __ xorl(cpu_reg_, cpu_reg_);
127 }
128 }
129 __ jmp(GetExitLabel());
130 }
131
GetDescription() const132 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
133
134 private:
135 const CpuRegister cpu_reg_;
136 const DataType::Type type_;
137 const bool is_div_;
138 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
139 };
140
141 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
142 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)143 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
144 : SlowPathCode(instruction), successor_(successor) {}
145
EmitNativeCode(CodeGenerator * codegen)146 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
147 LocationSummary* locations = instruction_->GetLocations();
148 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
149 __ Bind(GetEntryLabel());
150 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
151 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
152 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
153 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
154 if (successor_ == nullptr) {
155 __ jmp(GetReturnLabel());
156 } else {
157 __ jmp(x86_64_codegen->GetLabelOf(successor_));
158 }
159 }
160
GetReturnLabel()161 Label* GetReturnLabel() {
162 DCHECK(successor_ == nullptr);
163 return &return_label_;
164 }
165
GetSuccessor() const166 HBasicBlock* GetSuccessor() const {
167 return successor_;
168 }
169
GetDescription() const170 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
171
172 private:
173 HBasicBlock* const successor_;
174 Label return_label_;
175
176 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
177 };
178
179 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
180 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)181 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
182 : SlowPathCode(instruction) {}
183
EmitNativeCode(CodeGenerator * codegen)184 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
185 LocationSummary* locations = instruction_->GetLocations();
186 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
187 __ Bind(GetEntryLabel());
188 if (instruction_->CanThrowIntoCatchBlock()) {
189 // Live registers will be restored in the catch block if caught.
190 SaveLiveRegisters(codegen, instruction_->GetLocations());
191 }
192 // Are we using an array length from memory?
193 HInstruction* array_length = instruction_->InputAt(1);
194 Location length_loc = locations->InAt(1);
195 InvokeRuntimeCallingConvention calling_convention;
196 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
197 // Load the array length into our temporary.
198 HArrayLength* length = array_length->AsArrayLength();
199 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
200 Location array_loc = array_length->GetLocations()->InAt(0);
201 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
202 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
203 // Check for conflicts with index.
204 if (length_loc.Equals(locations->InAt(0))) {
205 // We know we aren't using parameter 2.
206 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
207 }
208 __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
209 if (mirror::kUseStringCompression && length->IsStringLength()) {
210 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
211 }
212 }
213
214 // We're moving two locations to locations that could overlap, so we need a parallel
215 // move resolver.
216 codegen->EmitParallelMoves(
217 locations->InAt(0),
218 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
219 DataType::Type::kInt32,
220 length_loc,
221 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
222 DataType::Type::kInt32);
223 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
224 ? kQuickThrowStringBounds
225 : kQuickThrowArrayBounds;
226 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
227 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
228 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
229 }
230
IsFatal() const231 bool IsFatal() const OVERRIDE { return true; }
232
GetDescription() const233 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
234
235 private:
236 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
237 };
238
239 class LoadClassSlowPathX86_64 : public SlowPathCode {
240 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)241 LoadClassSlowPathX86_64(HLoadClass* cls,
242 HInstruction* at,
243 uint32_t dex_pc,
244 bool do_clinit)
245 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
246 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
247 }
248
EmitNativeCode(CodeGenerator * codegen)249 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
250 LocationSummary* locations = instruction_->GetLocations();
251 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
252 __ Bind(GetEntryLabel());
253
254 SaveLiveRegisters(codegen, locations);
255
256 // Custom calling convention: RAX serves as both input and output.
257 __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
258 x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
259 instruction_,
260 dex_pc_,
261 this);
262 if (do_clinit_) {
263 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
264 } else {
265 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
266 }
267
268 Location out = locations->Out();
269 // Move the class to the desired location.
270 if (out.IsValid()) {
271 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
272 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
273 }
274
275 RestoreLiveRegisters(codegen, locations);
276 __ jmp(GetExitLabel());
277 }
278
GetDescription() const279 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
280
281 private:
282 // The class this slow path will load.
283 HLoadClass* const cls_;
284
285 // The dex PC of `at_`.
286 const uint32_t dex_pc_;
287
288 // Whether to initialize the class.
289 const bool do_clinit_;
290
291 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
292 };
293
294 class LoadStringSlowPathX86_64 : public SlowPathCode {
295 public:
LoadStringSlowPathX86_64(HLoadString * instruction)296 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
297
EmitNativeCode(CodeGenerator * codegen)298 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
299 LocationSummary* locations = instruction_->GetLocations();
300 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
301
302 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
303 __ Bind(GetEntryLabel());
304 SaveLiveRegisters(codegen, locations);
305
306 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
307 // Custom calling convention: RAX serves as both input and output.
308 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
309 x86_64_codegen->InvokeRuntime(kQuickResolveString,
310 instruction_,
311 instruction_->GetDexPc(),
312 this);
313 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
314 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
315 RestoreLiveRegisters(codegen, locations);
316
317 __ jmp(GetExitLabel());
318 }
319
GetDescription() const320 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
321
322 private:
323 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
324 };
325
326 class TypeCheckSlowPathX86_64 : public SlowPathCode {
327 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)328 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
329 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
330
EmitNativeCode(CodeGenerator * codegen)331 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
332 LocationSummary* locations = instruction_->GetLocations();
333 uint32_t dex_pc = instruction_->GetDexPc();
334 DCHECK(instruction_->IsCheckCast()
335 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
336
337 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
338 __ Bind(GetEntryLabel());
339
340 if (kPoisonHeapReferences &&
341 instruction_->IsCheckCast() &&
342 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
343 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
344 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
345 }
346
347 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
348 SaveLiveRegisters(codegen, locations);
349 }
350
351 // We're moving two locations to locations that could overlap, so we need a parallel
352 // move resolver.
353 InvokeRuntimeCallingConvention calling_convention;
354 codegen->EmitParallelMoves(locations->InAt(0),
355 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
356 DataType::Type::kReference,
357 locations->InAt(1),
358 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
359 DataType::Type::kReference);
360 if (instruction_->IsInstanceOf()) {
361 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
362 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
363 } else {
364 DCHECK(instruction_->IsCheckCast());
365 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
366 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
367 }
368
369 if (!is_fatal_) {
370 if (instruction_->IsInstanceOf()) {
371 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
372 }
373
374 RestoreLiveRegisters(codegen, locations);
375 __ jmp(GetExitLabel());
376 }
377 }
378
GetDescription() const379 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
380
IsFatal() const381 bool IsFatal() const OVERRIDE { return is_fatal_; }
382
383 private:
384 const bool is_fatal_;
385
386 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
387 };
388
389 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
390 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)391 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
392 : SlowPathCode(instruction) {}
393
EmitNativeCode(CodeGenerator * codegen)394 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
395 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
396 __ Bind(GetEntryLabel());
397 LocationSummary* locations = instruction_->GetLocations();
398 SaveLiveRegisters(codegen, locations);
399 InvokeRuntimeCallingConvention calling_convention;
400 x86_64_codegen->Load32BitValue(
401 CpuRegister(calling_convention.GetRegisterAt(0)),
402 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
403 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
404 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
405 }
406
GetDescription() const407 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
408
409 private:
410 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
411 };
412
413 class ArraySetSlowPathX86_64 : public SlowPathCode {
414 public:
ArraySetSlowPathX86_64(HInstruction * instruction)415 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
416
EmitNativeCode(CodeGenerator * codegen)417 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
418 LocationSummary* locations = instruction_->GetLocations();
419 __ Bind(GetEntryLabel());
420 SaveLiveRegisters(codegen, locations);
421
422 InvokeRuntimeCallingConvention calling_convention;
423 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
424 parallel_move.AddMove(
425 locations->InAt(0),
426 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
427 DataType::Type::kReference,
428 nullptr);
429 parallel_move.AddMove(
430 locations->InAt(1),
431 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
432 DataType::Type::kInt32,
433 nullptr);
434 parallel_move.AddMove(
435 locations->InAt(2),
436 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
437 DataType::Type::kReference,
438 nullptr);
439 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
440
441 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
442 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
443 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
444 RestoreLiveRegisters(codegen, locations);
445 __ jmp(GetExitLabel());
446 }
447
GetDescription() const448 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
449
450 private:
451 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
452 };
453
454 // Slow path marking an object reference `ref` during a read
455 // barrier. The field `obj.field` in the object `obj` holding this
456 // reference does not get updated by this slow path after marking (see
457 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
458 //
459 // This means that after the execution of this slow path, `ref` will
460 // always be up-to-date, but `obj.field` may not; i.e., after the
461 // flip, `ref` will be a to-space reference, but `obj.field` will
462 // probably still be a from-space reference (unless it gets updated by
463 // another thread, or if another thread installed another object
464 // reference (different from `ref`) in `obj.field`).
465 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
466 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)467 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
468 Location ref,
469 bool unpoison_ref_before_marking)
470 : SlowPathCode(instruction),
471 ref_(ref),
472 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
473 DCHECK(kEmitCompilerReadBarrier);
474 }
475
GetDescription() const476 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
477
EmitNativeCode(CodeGenerator * codegen)478 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
479 LocationSummary* locations = instruction_->GetLocations();
480 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
481 Register ref_reg = ref_cpu_reg.AsRegister();
482 DCHECK(locations->CanCall());
483 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
484 DCHECK(instruction_->IsInstanceFieldGet() ||
485 instruction_->IsStaticFieldGet() ||
486 instruction_->IsArrayGet() ||
487 instruction_->IsArraySet() ||
488 instruction_->IsLoadClass() ||
489 instruction_->IsLoadString() ||
490 instruction_->IsInstanceOf() ||
491 instruction_->IsCheckCast() ||
492 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
493 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
494 << "Unexpected instruction in read barrier marking slow path: "
495 << instruction_->DebugName();
496
497 __ Bind(GetEntryLabel());
498 if (unpoison_ref_before_marking_) {
499 // Object* ref = ref_addr->AsMirrorPtr()
500 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
501 }
502 // No need to save live registers; it's taken care of by the
503 // entrypoint. Also, there is no need to update the stack mask,
504 // as this runtime call will not trigger a garbage collection.
505 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
506 DCHECK_NE(ref_reg, RSP);
507 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
508 // "Compact" slow path, saving two moves.
509 //
510 // Instead of using the standard runtime calling convention (input
511 // and output in R0):
512 //
513 // RDI <- ref
514 // RAX <- ReadBarrierMark(RDI)
515 // ref <- RAX
516 //
517 // we just use rX (the register containing `ref`) as input and output
518 // of a dedicated entrypoint:
519 //
520 // rX <- ReadBarrierMarkRegX(rX)
521 //
522 int32_t entry_point_offset =
523 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
524 // This runtime call does not require a stack map.
525 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
526 __ jmp(GetExitLabel());
527 }
528
529 private:
530 // The location (register) of the marked object reference.
531 const Location ref_;
532 // Should the reference in `ref_` be unpoisoned prior to marking it?
533 const bool unpoison_ref_before_marking_;
534
535 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
536 };
537
538 // Slow path marking an object reference `ref` during a read barrier,
539 // and if needed, atomically updating the field `obj.field` in the
540 // object `obj` holding this reference after marking (contrary to
541 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
542 // `obj.field`).
543 //
544 // This means that after the execution of this slow path, both `ref`
545 // and `obj.field` will be up-to-date; i.e., after the flip, both will
546 // hold the same to-space reference (unless another thread installed
547 // another object reference (different from `ref`) in `obj.field`).
548 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
549 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)550 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
551 Location ref,
552 CpuRegister obj,
553 const Address& field_addr,
554 bool unpoison_ref_before_marking,
555 CpuRegister temp1,
556 CpuRegister temp2)
557 : SlowPathCode(instruction),
558 ref_(ref),
559 obj_(obj),
560 field_addr_(field_addr),
561 unpoison_ref_before_marking_(unpoison_ref_before_marking),
562 temp1_(temp1),
563 temp2_(temp2) {
564 DCHECK(kEmitCompilerReadBarrier);
565 }
566
GetDescription() const567 const char* GetDescription() const OVERRIDE {
568 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
569 }
570
EmitNativeCode(CodeGenerator * codegen)571 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
572 LocationSummary* locations = instruction_->GetLocations();
573 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
574 Register ref_reg = ref_cpu_reg.AsRegister();
575 DCHECK(locations->CanCall());
576 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
577 // This slow path is only used by the UnsafeCASObject intrinsic.
578 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
579 << "Unexpected instruction in read barrier marking and field updating slow path: "
580 << instruction_->DebugName();
581 DCHECK(instruction_->GetLocations()->Intrinsified());
582 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
583
584 __ Bind(GetEntryLabel());
585 if (unpoison_ref_before_marking_) {
586 // Object* ref = ref_addr->AsMirrorPtr()
587 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
588 }
589
590 // Save the old (unpoisoned) reference.
591 __ movl(temp1_, ref_cpu_reg);
592
593 // No need to save live registers; it's taken care of by the
594 // entrypoint. Also, there is no need to update the stack mask,
595 // as this runtime call will not trigger a garbage collection.
596 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
597 DCHECK_NE(ref_reg, RSP);
598 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
599 // "Compact" slow path, saving two moves.
600 //
601 // Instead of using the standard runtime calling convention (input
602 // and output in R0):
603 //
604 // RDI <- ref
605 // RAX <- ReadBarrierMark(RDI)
606 // ref <- RAX
607 //
608 // we just use rX (the register containing `ref`) as input and output
609 // of a dedicated entrypoint:
610 //
611 // rX <- ReadBarrierMarkRegX(rX)
612 //
613 int32_t entry_point_offset =
614 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
615 // This runtime call does not require a stack map.
616 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
617
618 // If the new reference is different from the old reference,
619 // update the field in the holder (`*field_addr`).
620 //
621 // Note that this field could also hold a different object, if
622 // another thread had concurrently changed it. In that case, the
623 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
624 // operation below would abort the CAS, leaving the field as-is.
625 NearLabel done;
626 __ cmpl(temp1_, ref_cpu_reg);
627 __ j(kEqual, &done);
628
629 // Update the the holder's field atomically. This may fail if
630 // mutator updates before us, but it's OK. This is achived
631 // using a strong compare-and-set (CAS) operation with relaxed
632 // memory synchronization ordering, where the expected value is
633 // the old reference and the desired value is the new reference.
634 // This operation is implemented with a 32-bit LOCK CMPXLCHG
635 // instruction, which requires the expected value (the old
636 // reference) to be in EAX. Save RAX beforehand, and move the
637 // expected value (stored in `temp1_`) into EAX.
638 __ movq(temp2_, CpuRegister(RAX));
639 __ movl(CpuRegister(RAX), temp1_);
640
641 // Convenience aliases.
642 CpuRegister base = obj_;
643 CpuRegister expected = CpuRegister(RAX);
644 CpuRegister value = ref_cpu_reg;
645
646 bool base_equals_value = (base.AsRegister() == value.AsRegister());
647 Register value_reg = ref_reg;
648 if (kPoisonHeapReferences) {
649 if (base_equals_value) {
650 // If `base` and `value` are the same register location, move
651 // `value_reg` to a temporary register. This way, poisoning
652 // `value_reg` won't invalidate `base`.
653 value_reg = temp1_.AsRegister();
654 __ movl(CpuRegister(value_reg), base);
655 }
656
657 // Check that the register allocator did not assign the location
658 // of `expected` (RAX) to `value` nor to `base`, so that heap
659 // poisoning (when enabled) works as intended below.
660 // - If `value` were equal to `expected`, both references would
661 // be poisoned twice, meaning they would not be poisoned at
662 // all, as heap poisoning uses address negation.
663 // - If `base` were equal to `expected`, poisoning `expected`
664 // would invalidate `base`.
665 DCHECK_NE(value_reg, expected.AsRegister());
666 DCHECK_NE(base.AsRegister(), expected.AsRegister());
667
668 __ PoisonHeapReference(expected);
669 __ PoisonHeapReference(CpuRegister(value_reg));
670 }
671
672 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
673
674 // If heap poisoning is enabled, we need to unpoison the values
675 // that were poisoned earlier.
676 if (kPoisonHeapReferences) {
677 if (base_equals_value) {
678 // `value_reg` has been moved to a temporary register, no need
679 // to unpoison it.
680 } else {
681 __ UnpoisonHeapReference(CpuRegister(value_reg));
682 }
683 // No need to unpoison `expected` (RAX), as it is be overwritten below.
684 }
685
686 // Restore RAX.
687 __ movq(CpuRegister(RAX), temp2_);
688
689 __ Bind(&done);
690 __ jmp(GetExitLabel());
691 }
692
693 private:
694 // The location (register) of the marked object reference.
695 const Location ref_;
696 // The register containing the object holding the marked object reference field.
697 const CpuRegister obj_;
698 // The address of the marked reference field. The base of this address must be `obj_`.
699 const Address field_addr_;
700
701 // Should the reference in `ref_` be unpoisoned prior to marking it?
702 const bool unpoison_ref_before_marking_;
703
704 const CpuRegister temp1_;
705 const CpuRegister temp2_;
706
707 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
708 };
709
710 // Slow path generating a read barrier for a heap reference.
711 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
712 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)713 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
714 Location out,
715 Location ref,
716 Location obj,
717 uint32_t offset,
718 Location index)
719 : SlowPathCode(instruction),
720 out_(out),
721 ref_(ref),
722 obj_(obj),
723 offset_(offset),
724 index_(index) {
725 DCHECK(kEmitCompilerReadBarrier);
726 // If `obj` is equal to `out` or `ref`, it means the initial
727 // object has been overwritten by (or after) the heap object
728 // reference load to be instrumented, e.g.:
729 //
730 // __ movl(out, Address(out, offset));
731 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
732 //
733 // In that case, we have lost the information about the original
734 // object, and the emitted read barrier cannot work properly.
735 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
736 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
737 }
738
EmitNativeCode(CodeGenerator * codegen)739 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
740 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
741 LocationSummary* locations = instruction_->GetLocations();
742 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
743 DCHECK(locations->CanCall());
744 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
745 DCHECK(instruction_->IsInstanceFieldGet() ||
746 instruction_->IsStaticFieldGet() ||
747 instruction_->IsArrayGet() ||
748 instruction_->IsInstanceOf() ||
749 instruction_->IsCheckCast() ||
750 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
751 << "Unexpected instruction in read barrier for heap reference slow path: "
752 << instruction_->DebugName();
753
754 __ Bind(GetEntryLabel());
755 SaveLiveRegisters(codegen, locations);
756
757 // We may have to change the index's value, but as `index_` is a
758 // constant member (like other "inputs" of this slow path),
759 // introduce a copy of it, `index`.
760 Location index = index_;
761 if (index_.IsValid()) {
762 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
763 if (instruction_->IsArrayGet()) {
764 // Compute real offset and store it in index_.
765 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
766 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
767 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
768 // We are about to change the value of `index_reg` (see the
769 // calls to art::x86_64::X86_64Assembler::shll and
770 // art::x86_64::X86_64Assembler::AddImmediate below), but it
771 // has not been saved by the previous call to
772 // art::SlowPathCode::SaveLiveRegisters, as it is a
773 // callee-save register --
774 // art::SlowPathCode::SaveLiveRegisters does not consider
775 // callee-save registers, as it has been designed with the
776 // assumption that callee-save registers are supposed to be
777 // handled by the called function. So, as a callee-save
778 // register, `index_reg` _would_ eventually be saved onto
779 // the stack, but it would be too late: we would have
780 // changed its value earlier. Therefore, we manually save
781 // it here into another freely available register,
782 // `free_reg`, chosen of course among the caller-save
783 // registers (as a callee-save `free_reg` register would
784 // exhibit the same problem).
785 //
786 // Note we could have requested a temporary register from
787 // the register allocator instead; but we prefer not to, as
788 // this is a slow path, and we know we can find a
789 // caller-save register that is available.
790 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
791 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
792 index_reg = free_reg;
793 index = Location::RegisterLocation(index_reg);
794 } else {
795 // The initial register stored in `index_` has already been
796 // saved in the call to art::SlowPathCode::SaveLiveRegisters
797 // (as it is not a callee-save register), so we can freely
798 // use it.
799 }
800 // Shifting the index value contained in `index_reg` by the
801 // scale factor (2) cannot overflow in practice, as the
802 // runtime is unable to allocate object arrays with a size
803 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
804 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
805 static_assert(
806 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
807 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
808 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
809 } else {
810 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
811 // intrinsics, `index_` is not shifted by a scale factor of 2
812 // (as in the case of ArrayGet), as it is actually an offset
813 // to an object field within an object.
814 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
815 DCHECK(instruction_->GetLocations()->Intrinsified());
816 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
817 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
818 << instruction_->AsInvoke()->GetIntrinsic();
819 DCHECK_EQ(offset_, 0U);
820 DCHECK(index_.IsRegister());
821 }
822 }
823
824 // We're moving two or three locations to locations that could
825 // overlap, so we need a parallel move resolver.
826 InvokeRuntimeCallingConvention calling_convention;
827 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
828 parallel_move.AddMove(ref_,
829 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
830 DataType::Type::kReference,
831 nullptr);
832 parallel_move.AddMove(obj_,
833 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
834 DataType::Type::kReference,
835 nullptr);
836 if (index.IsValid()) {
837 parallel_move.AddMove(index,
838 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
839 DataType::Type::kInt32,
840 nullptr);
841 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
842 } else {
843 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
844 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
845 }
846 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
847 instruction_,
848 instruction_->GetDexPc(),
849 this);
850 CheckEntrypointTypes<
851 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
852 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
853
854 RestoreLiveRegisters(codegen, locations);
855 __ jmp(GetExitLabel());
856 }
857
GetDescription() const858 const char* GetDescription() const OVERRIDE {
859 return "ReadBarrierForHeapReferenceSlowPathX86_64";
860 }
861
862 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)863 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
864 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
865 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
866 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
867 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
868 return static_cast<CpuRegister>(i);
869 }
870 }
871 // We shall never fail to find a free caller-save register, as
872 // there are more than two core caller-save registers on x86-64
873 // (meaning it is possible to find one which is different from
874 // `ref` and `obj`).
875 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
876 LOG(FATAL) << "Could not find a free caller-save register";
877 UNREACHABLE();
878 }
879
880 const Location out_;
881 const Location ref_;
882 const Location obj_;
883 const uint32_t offset_;
884 // An additional location containing an index to an array.
885 // Only used for HArrayGet and the UnsafeGetObject &
886 // UnsafeGetObjectVolatile intrinsics.
887 const Location index_;
888
889 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
890 };
891
892 // Slow path generating a read barrier for a GC root.
893 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
894 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)895 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
896 : SlowPathCode(instruction), out_(out), root_(root) {
897 DCHECK(kEmitCompilerReadBarrier);
898 }
899
EmitNativeCode(CodeGenerator * codegen)900 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
901 LocationSummary* locations = instruction_->GetLocations();
902 DCHECK(locations->CanCall());
903 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
904 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
905 << "Unexpected instruction in read barrier for GC root slow path: "
906 << instruction_->DebugName();
907
908 __ Bind(GetEntryLabel());
909 SaveLiveRegisters(codegen, locations);
910
911 InvokeRuntimeCallingConvention calling_convention;
912 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
913 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
914 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
915 instruction_,
916 instruction_->GetDexPc(),
917 this);
918 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
919 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
920
921 RestoreLiveRegisters(codegen, locations);
922 __ jmp(GetExitLabel());
923 }
924
GetDescription() const925 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
926
927 private:
928 const Location out_;
929 const Location root_;
930
931 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
932 };
933
934 #undef __
935 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
936 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
937
X86_64IntegerCondition(IfCondition cond)938 inline Condition X86_64IntegerCondition(IfCondition cond) {
939 switch (cond) {
940 case kCondEQ: return kEqual;
941 case kCondNE: return kNotEqual;
942 case kCondLT: return kLess;
943 case kCondLE: return kLessEqual;
944 case kCondGT: return kGreater;
945 case kCondGE: return kGreaterEqual;
946 case kCondB: return kBelow;
947 case kCondBE: return kBelowEqual;
948 case kCondA: return kAbove;
949 case kCondAE: return kAboveEqual;
950 }
951 LOG(FATAL) << "Unreachable";
952 UNREACHABLE();
953 }
954
955 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)956 inline Condition X86_64FPCondition(IfCondition cond) {
957 switch (cond) {
958 case kCondEQ: return kEqual;
959 case kCondNE: return kNotEqual;
960 case kCondLT: return kBelow;
961 case kCondLE: return kBelowEqual;
962 case kCondGT: return kAbove;
963 case kCondGE: return kAboveEqual;
964 default: break; // should not happen
965 }
966 LOG(FATAL) << "Unreachable";
967 UNREACHABLE();
968 }
969
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)970 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
971 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
972 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
973 return desired_dispatch_info;
974 }
975
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)976 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
977 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
978 // All registers are assumed to be correctly set up.
979
980 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
981 switch (invoke->GetMethodLoadKind()) {
982 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
983 // temp = thread->string_init_entrypoint
984 uint32_t offset =
985 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
986 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
987 break;
988 }
989 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
990 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
991 break;
992 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
993 DCHECK(GetCompilerOptions().IsBootImage());
994 __ leal(temp.AsRegister<CpuRegister>(),
995 Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
996 RecordBootImageMethodPatch(invoke);
997 break;
998 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
999 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1000 break;
1001 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1002 __ movq(temp.AsRegister<CpuRegister>(),
1003 Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
1004 RecordMethodBssEntryPatch(invoke);
1005 break;
1006 }
1007 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1008 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1009 return; // No code pointer retrieval; the runtime performs the call directly.
1010 }
1011 }
1012
1013 switch (invoke->GetCodePtrLocation()) {
1014 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1015 __ call(&frame_entry_label_);
1016 break;
1017 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1018 // (callee_method + offset_of_quick_compiled_code)()
1019 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1020 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1021 kX86_64PointerSize).SizeValue()));
1022 break;
1023 }
1024 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1025
1026 DCHECK(!IsLeafMethod());
1027 }
1028
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1029 void CodeGeneratorX86_64::GenerateVirtualCall(
1030 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1031 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1032 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1033 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1034
1035 // Use the calling convention instead of the location of the receiver, as
1036 // intrinsics may have put the receiver in a different register. In the intrinsics
1037 // slow path, the arguments have been moved to the right place, so here we are
1038 // guaranteed that the receiver is the first register of the calling convention.
1039 InvokeDexCallingConvention calling_convention;
1040 Register receiver = calling_convention.GetRegisterAt(0);
1041
1042 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1043 // /* HeapReference<Class> */ temp = receiver->klass_
1044 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1045 MaybeRecordImplicitNullCheck(invoke);
1046 // Instead of simply (possibly) unpoisoning `temp` here, we should
1047 // emit a read barrier for the previous class reference load.
1048 // However this is not required in practice, as this is an
1049 // intermediate/temporary reference and because the current
1050 // concurrent copying collector keeps the from-space memory
1051 // intact/accessible until the end of the marking phase (the
1052 // concurrent copying collector may not in the future).
1053 __ MaybeUnpoisonHeapReference(temp);
1054 // temp = temp->GetMethodAt(method_offset);
1055 __ movq(temp, Address(temp, method_offset));
1056 // call temp->GetEntryPoint();
1057 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1058 kX86_64PointerSize).SizeValue()));
1059 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1060 }
1061
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1062 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1063 boot_image_method_patches_.emplace_back(
1064 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1065 __ Bind(&boot_image_method_patches_.back().label);
1066 }
1067
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1068 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1069 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1070 __ Bind(&method_bss_entry_patches_.back().label);
1071 }
1072
RecordBootImageTypePatch(HLoadClass * load_class)1073 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1074 boot_image_type_patches_.emplace_back(
1075 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1076 __ Bind(&boot_image_type_patches_.back().label);
1077 }
1078
NewTypeBssEntryPatch(HLoadClass * load_class)1079 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1080 type_bss_entry_patches_.emplace_back(
1081 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1082 return &type_bss_entry_patches_.back().label;
1083 }
1084
RecordBootImageStringPatch(HLoadString * load_string)1085 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1086 boot_image_string_patches_.emplace_back(
1087 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1088 __ Bind(&boot_image_string_patches_.back().label);
1089 }
1090
NewStringBssEntryPatch(HLoadString * load_string)1091 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1092 DCHECK(!GetCompilerOptions().IsBootImage());
1093 string_bss_entry_patches_.emplace_back(
1094 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1095 return &string_bss_entry_patches_.back().label;
1096 }
1097
1098 // The label points to the end of the "movl" or another instruction but the literal offset
1099 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1100 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1101
1102 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1103 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1104 const ArenaDeque<PatchInfo<Label>>& infos,
1105 ArenaVector<linker::LinkerPatch>* linker_patches) {
1106 for (const PatchInfo<Label>& info : infos) {
1107 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1108 linker_patches->push_back(
1109 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1110 }
1111 }
1112
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1113 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1114 DCHECK(linker_patches->empty());
1115 size_t size =
1116 boot_image_method_patches_.size() +
1117 method_bss_entry_patches_.size() +
1118 boot_image_type_patches_.size() +
1119 type_bss_entry_patches_.size() +
1120 boot_image_string_patches_.size() +
1121 string_bss_entry_patches_.size();
1122 linker_patches->reserve(size);
1123 if (GetCompilerOptions().IsBootImage()) {
1124 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1125 boot_image_method_patches_, linker_patches);
1126 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1127 boot_image_type_patches_, linker_patches);
1128 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1129 boot_image_string_patches_, linker_patches);
1130 } else {
1131 DCHECK(boot_image_method_patches_.empty());
1132 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
1133 boot_image_type_patches_, linker_patches);
1134 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
1135 boot_image_string_patches_, linker_patches);
1136 }
1137 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1138 method_bss_entry_patches_, linker_patches);
1139 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1140 type_bss_entry_patches_, linker_patches);
1141 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1142 string_bss_entry_patches_, linker_patches);
1143 DCHECK_EQ(size, linker_patches->size());
1144 }
1145
DumpCoreRegister(std::ostream & stream,int reg) const1146 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1147 stream << Register(reg);
1148 }
1149
DumpFloatingPointRegister(std::ostream & stream,int reg) const1150 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1151 stream << FloatRegister(reg);
1152 }
1153
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1154 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1155 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1156 return kX86_64WordSize;
1157 }
1158
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1159 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1160 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1161 return kX86_64WordSize;
1162 }
1163
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1164 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1165 if (GetGraph()->HasSIMD()) {
1166 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1167 } else {
1168 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1169 }
1170 return GetFloatingPointSpillSlotSize();
1171 }
1172
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1173 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1174 if (GetGraph()->HasSIMD()) {
1175 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1176 } else {
1177 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1178 }
1179 return GetFloatingPointSpillSlotSize();
1180 }
1181
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1182 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1183 HInstruction* instruction,
1184 uint32_t dex_pc,
1185 SlowPathCode* slow_path) {
1186 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1187 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1188 if (EntrypointRequiresStackMap(entrypoint)) {
1189 RecordPcInfo(instruction, dex_pc, slow_path);
1190 }
1191 }
1192
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1193 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1194 HInstruction* instruction,
1195 SlowPathCode* slow_path) {
1196 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1197 GenerateInvokeRuntime(entry_point_offset);
1198 }
1199
GenerateInvokeRuntime(int32_t entry_point_offset)1200 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1201 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
1202 }
1203
1204 static constexpr int kNumberOfCpuRegisterPairs = 0;
1205 // Use a fake return address register to mimic Quick.
1206 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1207 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1208 const X86_64InstructionSetFeatures& isa_features,
1209 const CompilerOptions& compiler_options,
1210 OptimizingCompilerStats* stats)
1211 : CodeGenerator(graph,
1212 kNumberOfCpuRegisters,
1213 kNumberOfFloatRegisters,
1214 kNumberOfCpuRegisterPairs,
1215 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1216 arraysize(kCoreCalleeSaves))
1217 | (1 << kFakeReturnRegister),
1218 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1219 arraysize(kFpuCalleeSaves)),
1220 compiler_options,
1221 stats),
1222 block_labels_(nullptr),
1223 location_builder_(graph, this),
1224 instruction_visitor_(graph, this),
1225 move_resolver_(graph->GetAllocator(), this),
1226 assembler_(graph->GetAllocator()),
1227 isa_features_(isa_features),
1228 constant_area_start_(0),
1229 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1230 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1231 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1232 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1233 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1234 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1235 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1236 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1237 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1238 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1239 }
1240
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1241 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1242 CodeGeneratorX86_64* codegen)
1243 : InstructionCodeGenerator(graph, codegen),
1244 assembler_(codegen->GetAssembler()),
1245 codegen_(codegen) {}
1246
SetupBlockedRegisters() const1247 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1248 // Stack register is always reserved.
1249 blocked_core_registers_[RSP] = true;
1250
1251 // Block the register used as TMP.
1252 blocked_core_registers_[TMP] = true;
1253 }
1254
DWARFReg(Register reg)1255 static dwarf::Reg DWARFReg(Register reg) {
1256 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1257 }
1258
DWARFReg(FloatRegister reg)1259 static dwarf::Reg DWARFReg(FloatRegister reg) {
1260 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1261 }
1262
GenerateFrameEntry()1263 void CodeGeneratorX86_64::GenerateFrameEntry() {
1264 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1265 __ Bind(&frame_entry_label_);
1266 bool skip_overflow_check = IsLeafMethod()
1267 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1268 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1269
1270 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1271 __ addw(Address(CpuRegister(kMethodRegisterArgument),
1272 ArtMethod::HotnessCountOffset().Int32Value()),
1273 Immediate(1));
1274 }
1275
1276 if (!skip_overflow_check) {
1277 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1278 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1279 RecordPcInfo(nullptr, 0);
1280 }
1281
1282 if (HasEmptyFrame()) {
1283 return;
1284 }
1285
1286 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1287 Register reg = kCoreCalleeSaves[i];
1288 if (allocated_registers_.ContainsCoreRegister(reg)) {
1289 __ pushq(CpuRegister(reg));
1290 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1291 __ cfi().RelOffset(DWARFReg(reg), 0);
1292 }
1293 }
1294
1295 int adjust = GetFrameSize() - GetCoreSpillSize();
1296 __ subq(CpuRegister(RSP), Immediate(adjust));
1297 __ cfi().AdjustCFAOffset(adjust);
1298 uint32_t xmm_spill_location = GetFpuSpillStart();
1299 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1300
1301 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1302 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1303 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1304 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1305 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1306 }
1307 }
1308
1309 // Save the current method if we need it. Note that we do not
1310 // do this in HCurrentMethod, as the instruction might have been removed
1311 // in the SSA graph.
1312 if (RequiresCurrentMethod()) {
1313 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1314 CpuRegister(kMethodRegisterArgument));
1315 }
1316
1317 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1318 // Initialize should_deoptimize flag to 0.
1319 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1320 }
1321 }
1322
GenerateFrameExit()1323 void CodeGeneratorX86_64::GenerateFrameExit() {
1324 __ cfi().RememberState();
1325 if (!HasEmptyFrame()) {
1326 uint32_t xmm_spill_location = GetFpuSpillStart();
1327 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1328 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1329 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1330 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1331 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1332 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1333 }
1334 }
1335
1336 int adjust = GetFrameSize() - GetCoreSpillSize();
1337 __ addq(CpuRegister(RSP), Immediate(adjust));
1338 __ cfi().AdjustCFAOffset(-adjust);
1339
1340 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1341 Register reg = kCoreCalleeSaves[i];
1342 if (allocated_registers_.ContainsCoreRegister(reg)) {
1343 __ popq(CpuRegister(reg));
1344 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1345 __ cfi().Restore(DWARFReg(reg));
1346 }
1347 }
1348 }
1349 __ ret();
1350 __ cfi().RestoreState();
1351 __ cfi().DefCFAOffset(GetFrameSize());
1352 }
1353
Bind(HBasicBlock * block)1354 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1355 __ Bind(GetLabelOf(block));
1356 }
1357
Move(Location destination,Location source)1358 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1359 if (source.Equals(destination)) {
1360 return;
1361 }
1362 if (destination.IsRegister()) {
1363 CpuRegister dest = destination.AsRegister<CpuRegister>();
1364 if (source.IsRegister()) {
1365 __ movq(dest, source.AsRegister<CpuRegister>());
1366 } else if (source.IsFpuRegister()) {
1367 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1368 } else if (source.IsStackSlot()) {
1369 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1370 } else if (source.IsConstant()) {
1371 HConstant* constant = source.GetConstant();
1372 if (constant->IsLongConstant()) {
1373 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1374 } else {
1375 Load32BitValue(dest, GetInt32ValueOf(constant));
1376 }
1377 } else {
1378 DCHECK(source.IsDoubleStackSlot());
1379 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1380 }
1381 } else if (destination.IsFpuRegister()) {
1382 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1383 if (source.IsRegister()) {
1384 __ movd(dest, source.AsRegister<CpuRegister>());
1385 } else if (source.IsFpuRegister()) {
1386 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1387 } else if (source.IsConstant()) {
1388 HConstant* constant = source.GetConstant();
1389 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1390 if (constant->IsFloatConstant()) {
1391 Load32BitValue(dest, static_cast<int32_t>(value));
1392 } else {
1393 Load64BitValue(dest, value);
1394 }
1395 } else if (source.IsStackSlot()) {
1396 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1397 } else {
1398 DCHECK(source.IsDoubleStackSlot());
1399 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1400 }
1401 } else if (destination.IsStackSlot()) {
1402 if (source.IsRegister()) {
1403 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1404 source.AsRegister<CpuRegister>());
1405 } else if (source.IsFpuRegister()) {
1406 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1407 source.AsFpuRegister<XmmRegister>());
1408 } else if (source.IsConstant()) {
1409 HConstant* constant = source.GetConstant();
1410 int32_t value = GetInt32ValueOf(constant);
1411 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1412 } else {
1413 DCHECK(source.IsStackSlot()) << source;
1414 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1415 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1416 }
1417 } else {
1418 DCHECK(destination.IsDoubleStackSlot());
1419 if (source.IsRegister()) {
1420 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1421 source.AsRegister<CpuRegister>());
1422 } else if (source.IsFpuRegister()) {
1423 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1424 source.AsFpuRegister<XmmRegister>());
1425 } else if (source.IsConstant()) {
1426 HConstant* constant = source.GetConstant();
1427 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1428 int64_t value = GetInt64ValueOf(constant);
1429 Store64BitValueToStack(destination, value);
1430 } else {
1431 DCHECK(source.IsDoubleStackSlot());
1432 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1433 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1434 }
1435 }
1436 }
1437
MoveConstant(Location location,int32_t value)1438 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1439 DCHECK(location.IsRegister());
1440 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1441 }
1442
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1443 void CodeGeneratorX86_64::MoveLocation(
1444 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1445 Move(dst, src);
1446 }
1447
AddLocationAsTemp(Location location,LocationSummary * locations)1448 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1449 if (location.IsRegister()) {
1450 locations->AddTemp(location);
1451 } else {
1452 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1453 }
1454 }
1455
HandleGoto(HInstruction * got,HBasicBlock * successor)1456 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1457 if (successor->IsExitBlock()) {
1458 DCHECK(got->GetPrevious()->AlwaysThrows());
1459 return; // no code needed
1460 }
1461
1462 HBasicBlock* block = got->GetBlock();
1463 HInstruction* previous = got->GetPrevious();
1464
1465 HLoopInformation* info = block->GetLoopInformation();
1466 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1467 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1468 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
1469 __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
1470 Immediate(1));
1471 }
1472 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1473 return;
1474 }
1475
1476 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1477 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1478 }
1479 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1480 __ jmp(codegen_->GetLabelOf(successor));
1481 }
1482 }
1483
VisitGoto(HGoto * got)1484 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1485 got->SetLocations(nullptr);
1486 }
1487
VisitGoto(HGoto * got)1488 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1489 HandleGoto(got, got->GetSuccessor());
1490 }
1491
VisitTryBoundary(HTryBoundary * try_boundary)1492 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1493 try_boundary->SetLocations(nullptr);
1494 }
1495
VisitTryBoundary(HTryBoundary * try_boundary)1496 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1497 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1498 if (!successor->IsExitBlock()) {
1499 HandleGoto(try_boundary, successor);
1500 }
1501 }
1502
VisitExit(HExit * exit)1503 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1504 exit->SetLocations(nullptr);
1505 }
1506
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1507 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1508 }
1509
1510 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1511 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1512 LabelType* true_label,
1513 LabelType* false_label) {
1514 if (cond->IsFPConditionTrueIfNaN()) {
1515 __ j(kUnordered, true_label);
1516 } else if (cond->IsFPConditionFalseIfNaN()) {
1517 __ j(kUnordered, false_label);
1518 }
1519 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1520 }
1521
GenerateCompareTest(HCondition * condition)1522 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1523 LocationSummary* locations = condition->GetLocations();
1524
1525 Location left = locations->InAt(0);
1526 Location right = locations->InAt(1);
1527 DataType::Type type = condition->InputAt(0)->GetType();
1528 switch (type) {
1529 case DataType::Type::kBool:
1530 case DataType::Type::kUint8:
1531 case DataType::Type::kInt8:
1532 case DataType::Type::kUint16:
1533 case DataType::Type::kInt16:
1534 case DataType::Type::kInt32:
1535 case DataType::Type::kReference: {
1536 codegen_->GenerateIntCompare(left, right);
1537 break;
1538 }
1539 case DataType::Type::kInt64: {
1540 codegen_->GenerateLongCompare(left, right);
1541 break;
1542 }
1543 case DataType::Type::kFloat32: {
1544 if (right.IsFpuRegister()) {
1545 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1546 } else if (right.IsConstant()) {
1547 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1548 codegen_->LiteralFloatAddress(
1549 right.GetConstant()->AsFloatConstant()->GetValue()));
1550 } else {
1551 DCHECK(right.IsStackSlot());
1552 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1553 Address(CpuRegister(RSP), right.GetStackIndex()));
1554 }
1555 break;
1556 }
1557 case DataType::Type::kFloat64: {
1558 if (right.IsFpuRegister()) {
1559 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1560 } else if (right.IsConstant()) {
1561 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1562 codegen_->LiteralDoubleAddress(
1563 right.GetConstant()->AsDoubleConstant()->GetValue()));
1564 } else {
1565 DCHECK(right.IsDoubleStackSlot());
1566 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1567 Address(CpuRegister(RSP), right.GetStackIndex()));
1568 }
1569 break;
1570 }
1571 default:
1572 LOG(FATAL) << "Unexpected condition type " << type;
1573 }
1574 }
1575
1576 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1577 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1578 LabelType* true_target_in,
1579 LabelType* false_target_in) {
1580 // Generated branching requires both targets to be explicit. If either of the
1581 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1582 LabelType fallthrough_target;
1583 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1584 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1585
1586 // Generate the comparison to set the CC.
1587 GenerateCompareTest(condition);
1588
1589 // Now generate the correct jump(s).
1590 DataType::Type type = condition->InputAt(0)->GetType();
1591 switch (type) {
1592 case DataType::Type::kInt64: {
1593 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1594 break;
1595 }
1596 case DataType::Type::kFloat32: {
1597 GenerateFPJumps(condition, true_target, false_target);
1598 break;
1599 }
1600 case DataType::Type::kFloat64: {
1601 GenerateFPJumps(condition, true_target, false_target);
1602 break;
1603 }
1604 default:
1605 LOG(FATAL) << "Unexpected condition type " << type;
1606 }
1607
1608 if (false_target != &fallthrough_target) {
1609 __ jmp(false_target);
1610 }
1611
1612 if (fallthrough_target.IsLinked()) {
1613 __ Bind(&fallthrough_target);
1614 }
1615 }
1616
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1617 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1618 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1619 // are set only strictly before `branch`. We can't use the eflags on long
1620 // conditions if they are materialized due to the complex branching.
1621 return cond->IsCondition() &&
1622 cond->GetNext() == branch &&
1623 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1624 }
1625
1626 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1627 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1628 size_t condition_input_index,
1629 LabelType* true_target,
1630 LabelType* false_target) {
1631 HInstruction* cond = instruction->InputAt(condition_input_index);
1632
1633 if (true_target == nullptr && false_target == nullptr) {
1634 // Nothing to do. The code always falls through.
1635 return;
1636 } else if (cond->IsIntConstant()) {
1637 // Constant condition, statically compared against "true" (integer value 1).
1638 if (cond->AsIntConstant()->IsTrue()) {
1639 if (true_target != nullptr) {
1640 __ jmp(true_target);
1641 }
1642 } else {
1643 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1644 if (false_target != nullptr) {
1645 __ jmp(false_target);
1646 }
1647 }
1648 return;
1649 }
1650
1651 // The following code generates these patterns:
1652 // (1) true_target == nullptr && false_target != nullptr
1653 // - opposite condition true => branch to false_target
1654 // (2) true_target != nullptr && false_target == nullptr
1655 // - condition true => branch to true_target
1656 // (3) true_target != nullptr && false_target != nullptr
1657 // - condition true => branch to true_target
1658 // - branch to false_target
1659 if (IsBooleanValueOrMaterializedCondition(cond)) {
1660 if (AreEflagsSetFrom(cond, instruction)) {
1661 if (true_target == nullptr) {
1662 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1663 } else {
1664 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1665 }
1666 } else {
1667 // Materialized condition, compare against 0.
1668 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1669 if (lhs.IsRegister()) {
1670 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1671 } else {
1672 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1673 }
1674 if (true_target == nullptr) {
1675 __ j(kEqual, false_target);
1676 } else {
1677 __ j(kNotEqual, true_target);
1678 }
1679 }
1680 } else {
1681 // Condition has not been materialized, use its inputs as the
1682 // comparison and its condition as the branch condition.
1683 HCondition* condition = cond->AsCondition();
1684
1685 // If this is a long or FP comparison that has been folded into
1686 // the HCondition, generate the comparison directly.
1687 DataType::Type type = condition->InputAt(0)->GetType();
1688 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1689 GenerateCompareTestAndBranch(condition, true_target, false_target);
1690 return;
1691 }
1692
1693 Location lhs = condition->GetLocations()->InAt(0);
1694 Location rhs = condition->GetLocations()->InAt(1);
1695 codegen_->GenerateIntCompare(lhs, rhs);
1696 if (true_target == nullptr) {
1697 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1698 } else {
1699 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1700 }
1701 }
1702
1703 // If neither branch falls through (case 3), the conditional branch to `true_target`
1704 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1705 if (true_target != nullptr && false_target != nullptr) {
1706 __ jmp(false_target);
1707 }
1708 }
1709
VisitIf(HIf * if_instr)1710 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1711 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1712 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1713 locations->SetInAt(0, Location::Any());
1714 }
1715 }
1716
VisitIf(HIf * if_instr)1717 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1718 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1719 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1720 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1721 nullptr : codegen_->GetLabelOf(true_successor);
1722 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1723 nullptr : codegen_->GetLabelOf(false_successor);
1724 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1725 }
1726
VisitDeoptimize(HDeoptimize * deoptimize)1727 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1728 LocationSummary* locations = new (GetGraph()->GetAllocator())
1729 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1730 InvokeRuntimeCallingConvention calling_convention;
1731 RegisterSet caller_saves = RegisterSet::Empty();
1732 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1733 locations->SetCustomSlowPathCallerSaves(caller_saves);
1734 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1735 locations->SetInAt(0, Location::Any());
1736 }
1737 }
1738
VisitDeoptimize(HDeoptimize * deoptimize)1739 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1740 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1741 GenerateTestAndBranch<Label>(deoptimize,
1742 /* condition_input_index */ 0,
1743 slow_path->GetEntryLabel(),
1744 /* false_target */ nullptr);
1745 }
1746
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1747 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1748 LocationSummary* locations = new (GetGraph()->GetAllocator())
1749 LocationSummary(flag, LocationSummary::kNoCall);
1750 locations->SetOut(Location::RequiresRegister());
1751 }
1752
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1753 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1754 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1755 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1756 }
1757
SelectCanUseCMOV(HSelect * select)1758 static bool SelectCanUseCMOV(HSelect* select) {
1759 // There are no conditional move instructions for XMMs.
1760 if (DataType::IsFloatingPointType(select->GetType())) {
1761 return false;
1762 }
1763
1764 // A FP condition doesn't generate the single CC that we need.
1765 HInstruction* condition = select->GetCondition();
1766 if (condition->IsCondition() &&
1767 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1768 return false;
1769 }
1770
1771 // We can generate a CMOV for this Select.
1772 return true;
1773 }
1774
VisitSelect(HSelect * select)1775 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1776 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1777 if (DataType::IsFloatingPointType(select->GetType())) {
1778 locations->SetInAt(0, Location::RequiresFpuRegister());
1779 locations->SetInAt(1, Location::Any());
1780 } else {
1781 locations->SetInAt(0, Location::RequiresRegister());
1782 if (SelectCanUseCMOV(select)) {
1783 if (select->InputAt(1)->IsConstant()) {
1784 locations->SetInAt(1, Location::RequiresRegister());
1785 } else {
1786 locations->SetInAt(1, Location::Any());
1787 }
1788 } else {
1789 locations->SetInAt(1, Location::Any());
1790 }
1791 }
1792 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1793 locations->SetInAt(2, Location::RequiresRegister());
1794 }
1795 locations->SetOut(Location::SameAsFirstInput());
1796 }
1797
VisitSelect(HSelect * select)1798 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1799 LocationSummary* locations = select->GetLocations();
1800 if (SelectCanUseCMOV(select)) {
1801 // If both the condition and the source types are integer, we can generate
1802 // a CMOV to implement Select.
1803 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1804 Location value_true_loc = locations->InAt(1);
1805 DCHECK(locations->InAt(0).Equals(locations->Out()));
1806
1807 HInstruction* select_condition = select->GetCondition();
1808 Condition cond = kNotEqual;
1809
1810 // Figure out how to test the 'condition'.
1811 if (select_condition->IsCondition()) {
1812 HCondition* condition = select_condition->AsCondition();
1813 if (!condition->IsEmittedAtUseSite()) {
1814 // This was a previously materialized condition.
1815 // Can we use the existing condition code?
1816 if (AreEflagsSetFrom(condition, select)) {
1817 // Materialization was the previous instruction. Condition codes are right.
1818 cond = X86_64IntegerCondition(condition->GetCondition());
1819 } else {
1820 // No, we have to recreate the condition code.
1821 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1822 __ testl(cond_reg, cond_reg);
1823 }
1824 } else {
1825 GenerateCompareTest(condition);
1826 cond = X86_64IntegerCondition(condition->GetCondition());
1827 }
1828 } else {
1829 // Must be a Boolean condition, which needs to be compared to 0.
1830 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1831 __ testl(cond_reg, cond_reg);
1832 }
1833
1834 // If the condition is true, overwrite the output, which already contains false.
1835 // Generate the correct sized CMOV.
1836 bool is_64_bit = DataType::Is64BitType(select->GetType());
1837 if (value_true_loc.IsRegister()) {
1838 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1839 } else {
1840 __ cmov(cond,
1841 value_false,
1842 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1843 }
1844 } else {
1845 NearLabel false_target;
1846 GenerateTestAndBranch<NearLabel>(select,
1847 /* condition_input_index */ 2,
1848 /* true_target */ nullptr,
1849 &false_target);
1850 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1851 __ Bind(&false_target);
1852 }
1853 }
1854
VisitNativeDebugInfo(HNativeDebugInfo * info)1855 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1856 new (GetGraph()->GetAllocator()) LocationSummary(info);
1857 }
1858
VisitNativeDebugInfo(HNativeDebugInfo *)1859 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1860 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1861 }
1862
GenerateNop()1863 void CodeGeneratorX86_64::GenerateNop() {
1864 __ nop();
1865 }
1866
HandleCondition(HCondition * cond)1867 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1868 LocationSummary* locations =
1869 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1870 // Handle the long/FP comparisons made in instruction simplification.
1871 switch (cond->InputAt(0)->GetType()) {
1872 case DataType::Type::kInt64:
1873 locations->SetInAt(0, Location::RequiresRegister());
1874 locations->SetInAt(1, Location::Any());
1875 break;
1876 case DataType::Type::kFloat32:
1877 case DataType::Type::kFloat64:
1878 locations->SetInAt(0, Location::RequiresFpuRegister());
1879 locations->SetInAt(1, Location::Any());
1880 break;
1881 default:
1882 locations->SetInAt(0, Location::RequiresRegister());
1883 locations->SetInAt(1, Location::Any());
1884 break;
1885 }
1886 if (!cond->IsEmittedAtUseSite()) {
1887 locations->SetOut(Location::RequiresRegister());
1888 }
1889 }
1890
HandleCondition(HCondition * cond)1891 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1892 if (cond->IsEmittedAtUseSite()) {
1893 return;
1894 }
1895
1896 LocationSummary* locations = cond->GetLocations();
1897 Location lhs = locations->InAt(0);
1898 Location rhs = locations->InAt(1);
1899 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1900 NearLabel true_label, false_label;
1901
1902 switch (cond->InputAt(0)->GetType()) {
1903 default:
1904 // Integer case.
1905
1906 // Clear output register: setcc only sets the low byte.
1907 __ xorl(reg, reg);
1908
1909 codegen_->GenerateIntCompare(lhs, rhs);
1910 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1911 return;
1912 case DataType::Type::kInt64:
1913 // Clear output register: setcc only sets the low byte.
1914 __ xorl(reg, reg);
1915
1916 codegen_->GenerateLongCompare(lhs, rhs);
1917 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1918 return;
1919 case DataType::Type::kFloat32: {
1920 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1921 if (rhs.IsConstant()) {
1922 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1923 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1924 } else if (rhs.IsStackSlot()) {
1925 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1926 } else {
1927 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1928 }
1929 GenerateFPJumps(cond, &true_label, &false_label);
1930 break;
1931 }
1932 case DataType::Type::kFloat64: {
1933 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1934 if (rhs.IsConstant()) {
1935 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1936 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1937 } else if (rhs.IsDoubleStackSlot()) {
1938 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1939 } else {
1940 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1941 }
1942 GenerateFPJumps(cond, &true_label, &false_label);
1943 break;
1944 }
1945 }
1946
1947 // Convert the jumps into the result.
1948 NearLabel done_label;
1949
1950 // False case: result = 0.
1951 __ Bind(&false_label);
1952 __ xorl(reg, reg);
1953 __ jmp(&done_label);
1954
1955 // True case: result = 1.
1956 __ Bind(&true_label);
1957 __ movl(reg, Immediate(1));
1958 __ Bind(&done_label);
1959 }
1960
VisitEqual(HEqual * comp)1961 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1962 HandleCondition(comp);
1963 }
1964
VisitEqual(HEqual * comp)1965 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1966 HandleCondition(comp);
1967 }
1968
VisitNotEqual(HNotEqual * comp)1969 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1970 HandleCondition(comp);
1971 }
1972
VisitNotEqual(HNotEqual * comp)1973 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1974 HandleCondition(comp);
1975 }
1976
VisitLessThan(HLessThan * comp)1977 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1978 HandleCondition(comp);
1979 }
1980
VisitLessThan(HLessThan * comp)1981 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1982 HandleCondition(comp);
1983 }
1984
VisitLessThanOrEqual(HLessThanOrEqual * comp)1985 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1986 HandleCondition(comp);
1987 }
1988
VisitLessThanOrEqual(HLessThanOrEqual * comp)1989 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1990 HandleCondition(comp);
1991 }
1992
VisitGreaterThan(HGreaterThan * comp)1993 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1994 HandleCondition(comp);
1995 }
1996
VisitGreaterThan(HGreaterThan * comp)1997 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1998 HandleCondition(comp);
1999 }
2000
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2001 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2002 HandleCondition(comp);
2003 }
2004
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2005 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2006 HandleCondition(comp);
2007 }
2008
VisitBelow(HBelow * comp)2009 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2010 HandleCondition(comp);
2011 }
2012
VisitBelow(HBelow * comp)2013 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2014 HandleCondition(comp);
2015 }
2016
VisitBelowOrEqual(HBelowOrEqual * comp)2017 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2018 HandleCondition(comp);
2019 }
2020
VisitBelowOrEqual(HBelowOrEqual * comp)2021 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2022 HandleCondition(comp);
2023 }
2024
VisitAbove(HAbove * comp)2025 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2026 HandleCondition(comp);
2027 }
2028
VisitAbove(HAbove * comp)2029 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2030 HandleCondition(comp);
2031 }
2032
VisitAboveOrEqual(HAboveOrEqual * comp)2033 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2034 HandleCondition(comp);
2035 }
2036
VisitAboveOrEqual(HAboveOrEqual * comp)2037 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2038 HandleCondition(comp);
2039 }
2040
VisitCompare(HCompare * compare)2041 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2042 LocationSummary* locations =
2043 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2044 switch (compare->InputAt(0)->GetType()) {
2045 case DataType::Type::kBool:
2046 case DataType::Type::kUint8:
2047 case DataType::Type::kInt8:
2048 case DataType::Type::kUint16:
2049 case DataType::Type::kInt16:
2050 case DataType::Type::kInt32:
2051 case DataType::Type::kInt64: {
2052 locations->SetInAt(0, Location::RequiresRegister());
2053 locations->SetInAt(1, Location::Any());
2054 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2055 break;
2056 }
2057 case DataType::Type::kFloat32:
2058 case DataType::Type::kFloat64: {
2059 locations->SetInAt(0, Location::RequiresFpuRegister());
2060 locations->SetInAt(1, Location::Any());
2061 locations->SetOut(Location::RequiresRegister());
2062 break;
2063 }
2064 default:
2065 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2066 }
2067 }
2068
VisitCompare(HCompare * compare)2069 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2070 LocationSummary* locations = compare->GetLocations();
2071 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2072 Location left = locations->InAt(0);
2073 Location right = locations->InAt(1);
2074
2075 NearLabel less, greater, done;
2076 DataType::Type type = compare->InputAt(0)->GetType();
2077 Condition less_cond = kLess;
2078
2079 switch (type) {
2080 case DataType::Type::kBool:
2081 case DataType::Type::kUint8:
2082 case DataType::Type::kInt8:
2083 case DataType::Type::kUint16:
2084 case DataType::Type::kInt16:
2085 case DataType::Type::kInt32: {
2086 codegen_->GenerateIntCompare(left, right);
2087 break;
2088 }
2089 case DataType::Type::kInt64: {
2090 codegen_->GenerateLongCompare(left, right);
2091 break;
2092 }
2093 case DataType::Type::kFloat32: {
2094 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2095 if (right.IsConstant()) {
2096 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2097 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2098 } else if (right.IsStackSlot()) {
2099 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2100 } else {
2101 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2102 }
2103 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2104 less_cond = kBelow; // ucomis{s,d} sets CF
2105 break;
2106 }
2107 case DataType::Type::kFloat64: {
2108 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2109 if (right.IsConstant()) {
2110 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2111 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2112 } else if (right.IsDoubleStackSlot()) {
2113 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2114 } else {
2115 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2116 }
2117 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2118 less_cond = kBelow; // ucomis{s,d} sets CF
2119 break;
2120 }
2121 default:
2122 LOG(FATAL) << "Unexpected compare type " << type;
2123 }
2124
2125 __ movl(out, Immediate(0));
2126 __ j(kEqual, &done);
2127 __ j(less_cond, &less);
2128
2129 __ Bind(&greater);
2130 __ movl(out, Immediate(1));
2131 __ jmp(&done);
2132
2133 __ Bind(&less);
2134 __ movl(out, Immediate(-1));
2135
2136 __ Bind(&done);
2137 }
2138
VisitIntConstant(HIntConstant * constant)2139 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2140 LocationSummary* locations =
2141 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2142 locations->SetOut(Location::ConstantLocation(constant));
2143 }
2144
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2145 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2146 // Will be generated at use site.
2147 }
2148
VisitNullConstant(HNullConstant * constant)2149 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2150 LocationSummary* locations =
2151 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2152 locations->SetOut(Location::ConstantLocation(constant));
2153 }
2154
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2155 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2156 // Will be generated at use site.
2157 }
2158
VisitLongConstant(HLongConstant * constant)2159 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2160 LocationSummary* locations =
2161 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2162 locations->SetOut(Location::ConstantLocation(constant));
2163 }
2164
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2165 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2166 // Will be generated at use site.
2167 }
2168
VisitFloatConstant(HFloatConstant * constant)2169 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2170 LocationSummary* locations =
2171 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2172 locations->SetOut(Location::ConstantLocation(constant));
2173 }
2174
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2175 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2176 // Will be generated at use site.
2177 }
2178
VisitDoubleConstant(HDoubleConstant * constant)2179 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2180 LocationSummary* locations =
2181 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2182 locations->SetOut(Location::ConstantLocation(constant));
2183 }
2184
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2185 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2186 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2187 // Will be generated at use site.
2188 }
2189
VisitConstructorFence(HConstructorFence * constructor_fence)2190 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2191 constructor_fence->SetLocations(nullptr);
2192 }
2193
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2194 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2195 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2196 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2197 }
2198
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2199 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2200 memory_barrier->SetLocations(nullptr);
2201 }
2202
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2203 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2204 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2205 }
2206
VisitReturnVoid(HReturnVoid * ret)2207 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2208 ret->SetLocations(nullptr);
2209 }
2210
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2211 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2212 codegen_->GenerateFrameExit();
2213 }
2214
VisitReturn(HReturn * ret)2215 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2216 LocationSummary* locations =
2217 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2218 switch (ret->InputAt(0)->GetType()) {
2219 case DataType::Type::kReference:
2220 case DataType::Type::kBool:
2221 case DataType::Type::kUint8:
2222 case DataType::Type::kInt8:
2223 case DataType::Type::kUint16:
2224 case DataType::Type::kInt16:
2225 case DataType::Type::kInt32:
2226 case DataType::Type::kInt64:
2227 locations->SetInAt(0, Location::RegisterLocation(RAX));
2228 break;
2229
2230 case DataType::Type::kFloat32:
2231 case DataType::Type::kFloat64:
2232 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2233 break;
2234
2235 default:
2236 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2237 }
2238 }
2239
VisitReturn(HReturn * ret)2240 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2241 if (kIsDebugBuild) {
2242 switch (ret->InputAt(0)->GetType()) {
2243 case DataType::Type::kReference:
2244 case DataType::Type::kBool:
2245 case DataType::Type::kUint8:
2246 case DataType::Type::kInt8:
2247 case DataType::Type::kUint16:
2248 case DataType::Type::kInt16:
2249 case DataType::Type::kInt32:
2250 case DataType::Type::kInt64:
2251 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2252 break;
2253
2254 case DataType::Type::kFloat32:
2255 case DataType::Type::kFloat64:
2256 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2257 XMM0);
2258 break;
2259
2260 default:
2261 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2262 }
2263 }
2264 codegen_->GenerateFrameExit();
2265 }
2266
GetReturnLocation(DataType::Type type) const2267 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2268 switch (type) {
2269 case DataType::Type::kReference:
2270 case DataType::Type::kBool:
2271 case DataType::Type::kUint8:
2272 case DataType::Type::kInt8:
2273 case DataType::Type::kUint16:
2274 case DataType::Type::kInt16:
2275 case DataType::Type::kUint32:
2276 case DataType::Type::kInt32:
2277 case DataType::Type::kUint64:
2278 case DataType::Type::kInt64:
2279 return Location::RegisterLocation(RAX);
2280
2281 case DataType::Type::kVoid:
2282 return Location::NoLocation();
2283
2284 case DataType::Type::kFloat64:
2285 case DataType::Type::kFloat32:
2286 return Location::FpuRegisterLocation(XMM0);
2287 }
2288
2289 UNREACHABLE();
2290 }
2291
GetMethodLocation() const2292 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2293 return Location::RegisterLocation(kMethodRegisterArgument);
2294 }
2295
GetNextLocation(DataType::Type type)2296 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2297 switch (type) {
2298 case DataType::Type::kReference:
2299 case DataType::Type::kBool:
2300 case DataType::Type::kUint8:
2301 case DataType::Type::kInt8:
2302 case DataType::Type::kUint16:
2303 case DataType::Type::kInt16:
2304 case DataType::Type::kInt32: {
2305 uint32_t index = gp_index_++;
2306 stack_index_++;
2307 if (index < calling_convention.GetNumberOfRegisters()) {
2308 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2309 } else {
2310 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2311 }
2312 }
2313
2314 case DataType::Type::kInt64: {
2315 uint32_t index = gp_index_;
2316 stack_index_ += 2;
2317 if (index < calling_convention.GetNumberOfRegisters()) {
2318 gp_index_ += 1;
2319 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2320 } else {
2321 gp_index_ += 2;
2322 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2323 }
2324 }
2325
2326 case DataType::Type::kFloat32: {
2327 uint32_t index = float_index_++;
2328 stack_index_++;
2329 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2330 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2331 } else {
2332 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2333 }
2334 }
2335
2336 case DataType::Type::kFloat64: {
2337 uint32_t index = float_index_++;
2338 stack_index_ += 2;
2339 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2340 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2341 } else {
2342 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2343 }
2344 }
2345
2346 case DataType::Type::kUint32:
2347 case DataType::Type::kUint64:
2348 case DataType::Type::kVoid:
2349 LOG(FATAL) << "Unexpected parameter type " << type;
2350 break;
2351 }
2352 return Location::NoLocation();
2353 }
2354
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2355 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2356 // The trampoline uses the same calling convention as dex calling conventions,
2357 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2358 // the method_idx.
2359 HandleInvoke(invoke);
2360 }
2361
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2362 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2363 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2364 }
2365
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2366 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2367 // Explicit clinit checks triggered by static invokes must have been pruned by
2368 // art::PrepareForRegisterAllocation.
2369 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2370
2371 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2372 if (intrinsic.TryDispatch(invoke)) {
2373 return;
2374 }
2375
2376 HandleInvoke(invoke);
2377 }
2378
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2379 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2380 if (invoke->GetLocations()->Intrinsified()) {
2381 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2382 intrinsic.Dispatch(invoke);
2383 return true;
2384 }
2385 return false;
2386 }
2387
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2388 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2389 // Explicit clinit checks triggered by static invokes must have been pruned by
2390 // art::PrepareForRegisterAllocation.
2391 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2392
2393 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2394 return;
2395 }
2396
2397 LocationSummary* locations = invoke->GetLocations();
2398 codegen_->GenerateStaticOrDirectCall(
2399 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2400 }
2401
HandleInvoke(HInvoke * invoke)2402 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2403 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2404 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2405 }
2406
VisitInvokeVirtual(HInvokeVirtual * invoke)2407 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2408 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2409 if (intrinsic.TryDispatch(invoke)) {
2410 return;
2411 }
2412
2413 HandleInvoke(invoke);
2414 }
2415
VisitInvokeVirtual(HInvokeVirtual * invoke)2416 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2417 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2418 return;
2419 }
2420
2421 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2422 DCHECK(!codegen_->IsLeafMethod());
2423 }
2424
VisitInvokeInterface(HInvokeInterface * invoke)2425 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2426 HandleInvoke(invoke);
2427 // Add the hidden argument.
2428 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2429 }
2430
VisitInvokeInterface(HInvokeInterface * invoke)2431 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2432 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2433 LocationSummary* locations = invoke->GetLocations();
2434 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2435 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2436 Location receiver = locations->InAt(0);
2437 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2438
2439 // Set the hidden argument. This is safe to do this here, as RAX
2440 // won't be modified thereafter, before the `call` instruction.
2441 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2442 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2443
2444 if (receiver.IsStackSlot()) {
2445 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2446 // /* HeapReference<Class> */ temp = temp->klass_
2447 __ movl(temp, Address(temp, class_offset));
2448 } else {
2449 // /* HeapReference<Class> */ temp = receiver->klass_
2450 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2451 }
2452 codegen_->MaybeRecordImplicitNullCheck(invoke);
2453 // Instead of simply (possibly) unpoisoning `temp` here, we should
2454 // emit a read barrier for the previous class reference load.
2455 // However this is not required in practice, as this is an
2456 // intermediate/temporary reference and because the current
2457 // concurrent copying collector keeps the from-space memory
2458 // intact/accessible until the end of the marking phase (the
2459 // concurrent copying collector may not in the future).
2460 __ MaybeUnpoisonHeapReference(temp);
2461 // temp = temp->GetAddressOfIMT()
2462 __ movq(temp,
2463 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2464 // temp = temp->GetImtEntryAt(method_offset);
2465 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2466 invoke->GetImtIndex(), kX86_64PointerSize));
2467 // temp = temp->GetImtEntryAt(method_offset);
2468 __ movq(temp, Address(temp, method_offset));
2469 // call temp->GetEntryPoint();
2470 __ call(Address(
2471 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2472
2473 DCHECK(!codegen_->IsLeafMethod());
2474 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2475 }
2476
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2477 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2478 HandleInvoke(invoke);
2479 }
2480
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2481 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2482 codegen_->GenerateInvokePolymorphicCall(invoke);
2483 }
2484
VisitNeg(HNeg * neg)2485 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2486 LocationSummary* locations =
2487 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2488 switch (neg->GetResultType()) {
2489 case DataType::Type::kInt32:
2490 case DataType::Type::kInt64:
2491 locations->SetInAt(0, Location::RequiresRegister());
2492 locations->SetOut(Location::SameAsFirstInput());
2493 break;
2494
2495 case DataType::Type::kFloat32:
2496 case DataType::Type::kFloat64:
2497 locations->SetInAt(0, Location::RequiresFpuRegister());
2498 locations->SetOut(Location::SameAsFirstInput());
2499 locations->AddTemp(Location::RequiresFpuRegister());
2500 break;
2501
2502 default:
2503 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2504 }
2505 }
2506
VisitNeg(HNeg * neg)2507 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2508 LocationSummary* locations = neg->GetLocations();
2509 Location out = locations->Out();
2510 Location in = locations->InAt(0);
2511 switch (neg->GetResultType()) {
2512 case DataType::Type::kInt32:
2513 DCHECK(in.IsRegister());
2514 DCHECK(in.Equals(out));
2515 __ negl(out.AsRegister<CpuRegister>());
2516 break;
2517
2518 case DataType::Type::kInt64:
2519 DCHECK(in.IsRegister());
2520 DCHECK(in.Equals(out));
2521 __ negq(out.AsRegister<CpuRegister>());
2522 break;
2523
2524 case DataType::Type::kFloat32: {
2525 DCHECK(in.Equals(out));
2526 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2527 // Implement float negation with an exclusive or with value
2528 // 0x80000000 (mask for bit 31, representing the sign of a
2529 // single-precision floating-point number).
2530 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2531 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2532 break;
2533 }
2534
2535 case DataType::Type::kFloat64: {
2536 DCHECK(in.Equals(out));
2537 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2538 // Implement double negation with an exclusive or with value
2539 // 0x8000000000000000 (mask for bit 63, representing the sign of
2540 // a double-precision floating-point number).
2541 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2542 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2543 break;
2544 }
2545
2546 default:
2547 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2548 }
2549 }
2550
VisitTypeConversion(HTypeConversion * conversion)2551 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2552 LocationSummary* locations =
2553 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2554 DataType::Type result_type = conversion->GetResultType();
2555 DataType::Type input_type = conversion->GetInputType();
2556 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2557 << input_type << " -> " << result_type;
2558
2559 switch (result_type) {
2560 case DataType::Type::kUint8:
2561 case DataType::Type::kInt8:
2562 case DataType::Type::kUint16:
2563 case DataType::Type::kInt16:
2564 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2565 locations->SetInAt(0, Location::Any());
2566 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2567 break;
2568
2569 case DataType::Type::kInt32:
2570 switch (input_type) {
2571 case DataType::Type::kInt64:
2572 locations->SetInAt(0, Location::Any());
2573 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2574 break;
2575
2576 case DataType::Type::kFloat32:
2577 locations->SetInAt(0, Location::RequiresFpuRegister());
2578 locations->SetOut(Location::RequiresRegister());
2579 break;
2580
2581 case DataType::Type::kFloat64:
2582 locations->SetInAt(0, Location::RequiresFpuRegister());
2583 locations->SetOut(Location::RequiresRegister());
2584 break;
2585
2586 default:
2587 LOG(FATAL) << "Unexpected type conversion from " << input_type
2588 << " to " << result_type;
2589 }
2590 break;
2591
2592 case DataType::Type::kInt64:
2593 switch (input_type) {
2594 case DataType::Type::kBool:
2595 case DataType::Type::kUint8:
2596 case DataType::Type::kInt8:
2597 case DataType::Type::kUint16:
2598 case DataType::Type::kInt16:
2599 case DataType::Type::kInt32:
2600 // TODO: We would benefit from a (to-be-implemented)
2601 // Location::RegisterOrStackSlot requirement for this input.
2602 locations->SetInAt(0, Location::RequiresRegister());
2603 locations->SetOut(Location::RequiresRegister());
2604 break;
2605
2606 case DataType::Type::kFloat32:
2607 locations->SetInAt(0, Location::RequiresFpuRegister());
2608 locations->SetOut(Location::RequiresRegister());
2609 break;
2610
2611 case DataType::Type::kFloat64:
2612 locations->SetInAt(0, Location::RequiresFpuRegister());
2613 locations->SetOut(Location::RequiresRegister());
2614 break;
2615
2616 default:
2617 LOG(FATAL) << "Unexpected type conversion from " << input_type
2618 << " to " << result_type;
2619 }
2620 break;
2621
2622 case DataType::Type::kFloat32:
2623 switch (input_type) {
2624 case DataType::Type::kBool:
2625 case DataType::Type::kUint8:
2626 case DataType::Type::kInt8:
2627 case DataType::Type::kUint16:
2628 case DataType::Type::kInt16:
2629 case DataType::Type::kInt32:
2630 locations->SetInAt(0, Location::Any());
2631 locations->SetOut(Location::RequiresFpuRegister());
2632 break;
2633
2634 case DataType::Type::kInt64:
2635 locations->SetInAt(0, Location::Any());
2636 locations->SetOut(Location::RequiresFpuRegister());
2637 break;
2638
2639 case DataType::Type::kFloat64:
2640 locations->SetInAt(0, Location::Any());
2641 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2642 break;
2643
2644 default:
2645 LOG(FATAL) << "Unexpected type conversion from " << input_type
2646 << " to " << result_type;
2647 }
2648 break;
2649
2650 case DataType::Type::kFloat64:
2651 switch (input_type) {
2652 case DataType::Type::kBool:
2653 case DataType::Type::kUint8:
2654 case DataType::Type::kInt8:
2655 case DataType::Type::kUint16:
2656 case DataType::Type::kInt16:
2657 case DataType::Type::kInt32:
2658 locations->SetInAt(0, Location::Any());
2659 locations->SetOut(Location::RequiresFpuRegister());
2660 break;
2661
2662 case DataType::Type::kInt64:
2663 locations->SetInAt(0, Location::Any());
2664 locations->SetOut(Location::RequiresFpuRegister());
2665 break;
2666
2667 case DataType::Type::kFloat32:
2668 locations->SetInAt(0, Location::Any());
2669 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2670 break;
2671
2672 default:
2673 LOG(FATAL) << "Unexpected type conversion from " << input_type
2674 << " to " << result_type;
2675 }
2676 break;
2677
2678 default:
2679 LOG(FATAL) << "Unexpected type conversion from " << input_type
2680 << " to " << result_type;
2681 }
2682 }
2683
VisitTypeConversion(HTypeConversion * conversion)2684 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2685 LocationSummary* locations = conversion->GetLocations();
2686 Location out = locations->Out();
2687 Location in = locations->InAt(0);
2688 DataType::Type result_type = conversion->GetResultType();
2689 DataType::Type input_type = conversion->GetInputType();
2690 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2691 << input_type << " -> " << result_type;
2692 switch (result_type) {
2693 case DataType::Type::kUint8:
2694 switch (input_type) {
2695 case DataType::Type::kInt8:
2696 case DataType::Type::kUint16:
2697 case DataType::Type::kInt16:
2698 case DataType::Type::kInt32:
2699 case DataType::Type::kInt64:
2700 if (in.IsRegister()) {
2701 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2702 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2703 __ movzxb(out.AsRegister<CpuRegister>(),
2704 Address(CpuRegister(RSP), in.GetStackIndex()));
2705 } else {
2706 __ movl(out.AsRegister<CpuRegister>(),
2707 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2708 }
2709 break;
2710
2711 default:
2712 LOG(FATAL) << "Unexpected type conversion from " << input_type
2713 << " to " << result_type;
2714 }
2715 break;
2716
2717 case DataType::Type::kInt8:
2718 switch (input_type) {
2719 case DataType::Type::kUint8:
2720 case DataType::Type::kUint16:
2721 case DataType::Type::kInt16:
2722 case DataType::Type::kInt32:
2723 case DataType::Type::kInt64:
2724 if (in.IsRegister()) {
2725 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2726 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2727 __ movsxb(out.AsRegister<CpuRegister>(),
2728 Address(CpuRegister(RSP), in.GetStackIndex()));
2729 } else {
2730 __ movl(out.AsRegister<CpuRegister>(),
2731 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2732 }
2733 break;
2734
2735 default:
2736 LOG(FATAL) << "Unexpected type conversion from " << input_type
2737 << " to " << result_type;
2738 }
2739 break;
2740
2741 case DataType::Type::kUint16:
2742 switch (input_type) {
2743 case DataType::Type::kInt8:
2744 case DataType::Type::kInt16:
2745 case DataType::Type::kInt32:
2746 case DataType::Type::kInt64:
2747 if (in.IsRegister()) {
2748 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2749 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2750 __ movzxw(out.AsRegister<CpuRegister>(),
2751 Address(CpuRegister(RSP), in.GetStackIndex()));
2752 } else {
2753 __ movl(out.AsRegister<CpuRegister>(),
2754 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2755 }
2756 break;
2757
2758 default:
2759 LOG(FATAL) << "Unexpected type conversion from " << input_type
2760 << " to " << result_type;
2761 }
2762 break;
2763
2764 case DataType::Type::kInt16:
2765 switch (input_type) {
2766 case DataType::Type::kUint16:
2767 case DataType::Type::kInt32:
2768 case DataType::Type::kInt64:
2769 if (in.IsRegister()) {
2770 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2771 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2772 __ movsxw(out.AsRegister<CpuRegister>(),
2773 Address(CpuRegister(RSP), in.GetStackIndex()));
2774 } else {
2775 __ movl(out.AsRegister<CpuRegister>(),
2776 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2777 }
2778 break;
2779
2780 default:
2781 LOG(FATAL) << "Unexpected type conversion from " << input_type
2782 << " to " << result_type;
2783 }
2784 break;
2785
2786 case DataType::Type::kInt32:
2787 switch (input_type) {
2788 case DataType::Type::kInt64:
2789 if (in.IsRegister()) {
2790 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2791 } else if (in.IsDoubleStackSlot()) {
2792 __ movl(out.AsRegister<CpuRegister>(),
2793 Address(CpuRegister(RSP), in.GetStackIndex()));
2794 } else {
2795 DCHECK(in.IsConstant());
2796 DCHECK(in.GetConstant()->IsLongConstant());
2797 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2798 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2799 }
2800 break;
2801
2802 case DataType::Type::kFloat32: {
2803 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2804 CpuRegister output = out.AsRegister<CpuRegister>();
2805 NearLabel done, nan;
2806
2807 __ movl(output, Immediate(kPrimIntMax));
2808 // if input >= (float)INT_MAX goto done
2809 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2810 __ j(kAboveEqual, &done);
2811 // if input == NaN goto nan
2812 __ j(kUnordered, &nan);
2813 // output = float-to-int-truncate(input)
2814 __ cvttss2si(output, input, false);
2815 __ jmp(&done);
2816 __ Bind(&nan);
2817 // output = 0
2818 __ xorl(output, output);
2819 __ Bind(&done);
2820 break;
2821 }
2822
2823 case DataType::Type::kFloat64: {
2824 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2825 CpuRegister output = out.AsRegister<CpuRegister>();
2826 NearLabel done, nan;
2827
2828 __ movl(output, Immediate(kPrimIntMax));
2829 // if input >= (double)INT_MAX goto done
2830 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2831 __ j(kAboveEqual, &done);
2832 // if input == NaN goto nan
2833 __ j(kUnordered, &nan);
2834 // output = double-to-int-truncate(input)
2835 __ cvttsd2si(output, input);
2836 __ jmp(&done);
2837 __ Bind(&nan);
2838 // output = 0
2839 __ xorl(output, output);
2840 __ Bind(&done);
2841 break;
2842 }
2843
2844 default:
2845 LOG(FATAL) << "Unexpected type conversion from " << input_type
2846 << " to " << result_type;
2847 }
2848 break;
2849
2850 case DataType::Type::kInt64:
2851 switch (input_type) {
2852 DCHECK(out.IsRegister());
2853 case DataType::Type::kBool:
2854 case DataType::Type::kUint8:
2855 case DataType::Type::kInt8:
2856 case DataType::Type::kUint16:
2857 case DataType::Type::kInt16:
2858 case DataType::Type::kInt32:
2859 DCHECK(in.IsRegister());
2860 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2861 break;
2862
2863 case DataType::Type::kFloat32: {
2864 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2865 CpuRegister output = out.AsRegister<CpuRegister>();
2866 NearLabel done, nan;
2867
2868 codegen_->Load64BitValue(output, kPrimLongMax);
2869 // if input >= (float)LONG_MAX goto done
2870 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2871 __ j(kAboveEqual, &done);
2872 // if input == NaN goto nan
2873 __ j(kUnordered, &nan);
2874 // output = float-to-long-truncate(input)
2875 __ cvttss2si(output, input, true);
2876 __ jmp(&done);
2877 __ Bind(&nan);
2878 // output = 0
2879 __ xorl(output, output);
2880 __ Bind(&done);
2881 break;
2882 }
2883
2884 case DataType::Type::kFloat64: {
2885 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2886 CpuRegister output = out.AsRegister<CpuRegister>();
2887 NearLabel done, nan;
2888
2889 codegen_->Load64BitValue(output, kPrimLongMax);
2890 // if input >= (double)LONG_MAX goto done
2891 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2892 __ j(kAboveEqual, &done);
2893 // if input == NaN goto nan
2894 __ j(kUnordered, &nan);
2895 // output = double-to-long-truncate(input)
2896 __ cvttsd2si(output, input, true);
2897 __ jmp(&done);
2898 __ Bind(&nan);
2899 // output = 0
2900 __ xorl(output, output);
2901 __ Bind(&done);
2902 break;
2903 }
2904
2905 default:
2906 LOG(FATAL) << "Unexpected type conversion from " << input_type
2907 << " to " << result_type;
2908 }
2909 break;
2910
2911 case DataType::Type::kFloat32:
2912 switch (input_type) {
2913 case DataType::Type::kBool:
2914 case DataType::Type::kUint8:
2915 case DataType::Type::kInt8:
2916 case DataType::Type::kUint16:
2917 case DataType::Type::kInt16:
2918 case DataType::Type::kInt32:
2919 if (in.IsRegister()) {
2920 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2921 } else if (in.IsConstant()) {
2922 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2923 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2924 codegen_->Load32BitValue(dest, static_cast<float>(v));
2925 } else {
2926 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2927 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2928 }
2929 break;
2930
2931 case DataType::Type::kInt64:
2932 if (in.IsRegister()) {
2933 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2934 } else if (in.IsConstant()) {
2935 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2936 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2937 codegen_->Load32BitValue(dest, static_cast<float>(v));
2938 } else {
2939 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2940 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2941 }
2942 break;
2943
2944 case DataType::Type::kFloat64:
2945 if (in.IsFpuRegister()) {
2946 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2947 } else if (in.IsConstant()) {
2948 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2949 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2950 codegen_->Load32BitValue(dest, static_cast<float>(v));
2951 } else {
2952 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2953 Address(CpuRegister(RSP), in.GetStackIndex()));
2954 }
2955 break;
2956
2957 default:
2958 LOG(FATAL) << "Unexpected type conversion from " << input_type
2959 << " to " << result_type;
2960 }
2961 break;
2962
2963 case DataType::Type::kFloat64:
2964 switch (input_type) {
2965 case DataType::Type::kBool:
2966 case DataType::Type::kUint8:
2967 case DataType::Type::kInt8:
2968 case DataType::Type::kUint16:
2969 case DataType::Type::kInt16:
2970 case DataType::Type::kInt32:
2971 if (in.IsRegister()) {
2972 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2973 } else if (in.IsConstant()) {
2974 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2975 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2976 codegen_->Load64BitValue(dest, static_cast<double>(v));
2977 } else {
2978 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2979 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2980 }
2981 break;
2982
2983 case DataType::Type::kInt64:
2984 if (in.IsRegister()) {
2985 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2986 } else if (in.IsConstant()) {
2987 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2988 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2989 codegen_->Load64BitValue(dest, static_cast<double>(v));
2990 } else {
2991 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2992 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2993 }
2994 break;
2995
2996 case DataType::Type::kFloat32:
2997 if (in.IsFpuRegister()) {
2998 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2999 } else if (in.IsConstant()) {
3000 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3001 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3002 codegen_->Load64BitValue(dest, static_cast<double>(v));
3003 } else {
3004 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3005 Address(CpuRegister(RSP), in.GetStackIndex()));
3006 }
3007 break;
3008
3009 default:
3010 LOG(FATAL) << "Unexpected type conversion from " << input_type
3011 << " to " << result_type;
3012 }
3013 break;
3014
3015 default:
3016 LOG(FATAL) << "Unexpected type conversion from " << input_type
3017 << " to " << result_type;
3018 }
3019 }
3020
VisitAdd(HAdd * add)3021 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3022 LocationSummary* locations =
3023 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3024 switch (add->GetResultType()) {
3025 case DataType::Type::kInt32: {
3026 locations->SetInAt(0, Location::RequiresRegister());
3027 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3028 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3029 break;
3030 }
3031
3032 case DataType::Type::kInt64: {
3033 locations->SetInAt(0, Location::RequiresRegister());
3034 // We can use a leaq or addq if the constant can fit in an immediate.
3035 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3036 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3037 break;
3038 }
3039
3040 case DataType::Type::kFloat64:
3041 case DataType::Type::kFloat32: {
3042 locations->SetInAt(0, Location::RequiresFpuRegister());
3043 locations->SetInAt(1, Location::Any());
3044 locations->SetOut(Location::SameAsFirstInput());
3045 break;
3046 }
3047
3048 default:
3049 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3050 }
3051 }
3052
VisitAdd(HAdd * add)3053 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3054 LocationSummary* locations = add->GetLocations();
3055 Location first = locations->InAt(0);
3056 Location second = locations->InAt(1);
3057 Location out = locations->Out();
3058
3059 switch (add->GetResultType()) {
3060 case DataType::Type::kInt32: {
3061 if (second.IsRegister()) {
3062 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3063 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3064 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3065 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3066 } else {
3067 __ leal(out.AsRegister<CpuRegister>(), Address(
3068 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3069 }
3070 } else if (second.IsConstant()) {
3071 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3072 __ addl(out.AsRegister<CpuRegister>(),
3073 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3074 } else {
3075 __ leal(out.AsRegister<CpuRegister>(), Address(
3076 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3077 }
3078 } else {
3079 DCHECK(first.Equals(locations->Out()));
3080 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3081 }
3082 break;
3083 }
3084
3085 case DataType::Type::kInt64: {
3086 if (second.IsRegister()) {
3087 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3088 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3089 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3090 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3091 } else {
3092 __ leaq(out.AsRegister<CpuRegister>(), Address(
3093 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3094 }
3095 } else {
3096 DCHECK(second.IsConstant());
3097 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3098 int32_t int32_value = Low32Bits(value);
3099 DCHECK_EQ(int32_value, value);
3100 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3101 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3102 } else {
3103 __ leaq(out.AsRegister<CpuRegister>(), Address(
3104 first.AsRegister<CpuRegister>(), int32_value));
3105 }
3106 }
3107 break;
3108 }
3109
3110 case DataType::Type::kFloat32: {
3111 if (second.IsFpuRegister()) {
3112 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3113 } else if (second.IsConstant()) {
3114 __ addss(first.AsFpuRegister<XmmRegister>(),
3115 codegen_->LiteralFloatAddress(
3116 second.GetConstant()->AsFloatConstant()->GetValue()));
3117 } else {
3118 DCHECK(second.IsStackSlot());
3119 __ addss(first.AsFpuRegister<XmmRegister>(),
3120 Address(CpuRegister(RSP), second.GetStackIndex()));
3121 }
3122 break;
3123 }
3124
3125 case DataType::Type::kFloat64: {
3126 if (second.IsFpuRegister()) {
3127 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3128 } else if (second.IsConstant()) {
3129 __ addsd(first.AsFpuRegister<XmmRegister>(),
3130 codegen_->LiteralDoubleAddress(
3131 second.GetConstant()->AsDoubleConstant()->GetValue()));
3132 } else {
3133 DCHECK(second.IsDoubleStackSlot());
3134 __ addsd(first.AsFpuRegister<XmmRegister>(),
3135 Address(CpuRegister(RSP), second.GetStackIndex()));
3136 }
3137 break;
3138 }
3139
3140 default:
3141 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3142 }
3143 }
3144
VisitSub(HSub * sub)3145 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3146 LocationSummary* locations =
3147 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3148 switch (sub->GetResultType()) {
3149 case DataType::Type::kInt32: {
3150 locations->SetInAt(0, Location::RequiresRegister());
3151 locations->SetInAt(1, Location::Any());
3152 locations->SetOut(Location::SameAsFirstInput());
3153 break;
3154 }
3155 case DataType::Type::kInt64: {
3156 locations->SetInAt(0, Location::RequiresRegister());
3157 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3158 locations->SetOut(Location::SameAsFirstInput());
3159 break;
3160 }
3161 case DataType::Type::kFloat32:
3162 case DataType::Type::kFloat64: {
3163 locations->SetInAt(0, Location::RequiresFpuRegister());
3164 locations->SetInAt(1, Location::Any());
3165 locations->SetOut(Location::SameAsFirstInput());
3166 break;
3167 }
3168 default:
3169 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3170 }
3171 }
3172
VisitSub(HSub * sub)3173 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3174 LocationSummary* locations = sub->GetLocations();
3175 Location first = locations->InAt(0);
3176 Location second = locations->InAt(1);
3177 DCHECK(first.Equals(locations->Out()));
3178 switch (sub->GetResultType()) {
3179 case DataType::Type::kInt32: {
3180 if (second.IsRegister()) {
3181 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3182 } else if (second.IsConstant()) {
3183 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3184 __ subl(first.AsRegister<CpuRegister>(), imm);
3185 } else {
3186 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3187 }
3188 break;
3189 }
3190 case DataType::Type::kInt64: {
3191 if (second.IsConstant()) {
3192 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3193 DCHECK(IsInt<32>(value));
3194 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3195 } else {
3196 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3197 }
3198 break;
3199 }
3200
3201 case DataType::Type::kFloat32: {
3202 if (second.IsFpuRegister()) {
3203 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3204 } else if (second.IsConstant()) {
3205 __ subss(first.AsFpuRegister<XmmRegister>(),
3206 codegen_->LiteralFloatAddress(
3207 second.GetConstant()->AsFloatConstant()->GetValue()));
3208 } else {
3209 DCHECK(second.IsStackSlot());
3210 __ subss(first.AsFpuRegister<XmmRegister>(),
3211 Address(CpuRegister(RSP), second.GetStackIndex()));
3212 }
3213 break;
3214 }
3215
3216 case DataType::Type::kFloat64: {
3217 if (second.IsFpuRegister()) {
3218 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3219 } else if (second.IsConstant()) {
3220 __ subsd(first.AsFpuRegister<XmmRegister>(),
3221 codegen_->LiteralDoubleAddress(
3222 second.GetConstant()->AsDoubleConstant()->GetValue()));
3223 } else {
3224 DCHECK(second.IsDoubleStackSlot());
3225 __ subsd(first.AsFpuRegister<XmmRegister>(),
3226 Address(CpuRegister(RSP), second.GetStackIndex()));
3227 }
3228 break;
3229 }
3230
3231 default:
3232 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3233 }
3234 }
3235
VisitMul(HMul * mul)3236 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3237 LocationSummary* locations =
3238 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3239 switch (mul->GetResultType()) {
3240 case DataType::Type::kInt32: {
3241 locations->SetInAt(0, Location::RequiresRegister());
3242 locations->SetInAt(1, Location::Any());
3243 if (mul->InputAt(1)->IsIntConstant()) {
3244 // Can use 3 operand multiply.
3245 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3246 } else {
3247 locations->SetOut(Location::SameAsFirstInput());
3248 }
3249 break;
3250 }
3251 case DataType::Type::kInt64: {
3252 locations->SetInAt(0, Location::RequiresRegister());
3253 locations->SetInAt(1, Location::Any());
3254 if (mul->InputAt(1)->IsLongConstant() &&
3255 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3256 // Can use 3 operand multiply.
3257 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3258 } else {
3259 locations->SetOut(Location::SameAsFirstInput());
3260 }
3261 break;
3262 }
3263 case DataType::Type::kFloat32:
3264 case DataType::Type::kFloat64: {
3265 locations->SetInAt(0, Location::RequiresFpuRegister());
3266 locations->SetInAt(1, Location::Any());
3267 locations->SetOut(Location::SameAsFirstInput());
3268 break;
3269 }
3270
3271 default:
3272 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3273 }
3274 }
3275
VisitMul(HMul * mul)3276 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3277 LocationSummary* locations = mul->GetLocations();
3278 Location first = locations->InAt(0);
3279 Location second = locations->InAt(1);
3280 Location out = locations->Out();
3281 switch (mul->GetResultType()) {
3282 case DataType::Type::kInt32:
3283 // The constant may have ended up in a register, so test explicitly to avoid
3284 // problems where the output may not be the same as the first operand.
3285 if (mul->InputAt(1)->IsIntConstant()) {
3286 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3287 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3288 } else if (second.IsRegister()) {
3289 DCHECK(first.Equals(out));
3290 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3291 } else {
3292 DCHECK(first.Equals(out));
3293 DCHECK(second.IsStackSlot());
3294 __ imull(first.AsRegister<CpuRegister>(),
3295 Address(CpuRegister(RSP), second.GetStackIndex()));
3296 }
3297 break;
3298 case DataType::Type::kInt64: {
3299 // The constant may have ended up in a register, so test explicitly to avoid
3300 // problems where the output may not be the same as the first operand.
3301 if (mul->InputAt(1)->IsLongConstant()) {
3302 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3303 if (IsInt<32>(value)) {
3304 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3305 Immediate(static_cast<int32_t>(value)));
3306 } else {
3307 // Have to use the constant area.
3308 DCHECK(first.Equals(out));
3309 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3310 }
3311 } else if (second.IsRegister()) {
3312 DCHECK(first.Equals(out));
3313 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3314 } else {
3315 DCHECK(second.IsDoubleStackSlot());
3316 DCHECK(first.Equals(out));
3317 __ imulq(first.AsRegister<CpuRegister>(),
3318 Address(CpuRegister(RSP), second.GetStackIndex()));
3319 }
3320 break;
3321 }
3322
3323 case DataType::Type::kFloat32: {
3324 DCHECK(first.Equals(out));
3325 if (second.IsFpuRegister()) {
3326 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3327 } else if (second.IsConstant()) {
3328 __ mulss(first.AsFpuRegister<XmmRegister>(),
3329 codegen_->LiteralFloatAddress(
3330 second.GetConstant()->AsFloatConstant()->GetValue()));
3331 } else {
3332 DCHECK(second.IsStackSlot());
3333 __ mulss(first.AsFpuRegister<XmmRegister>(),
3334 Address(CpuRegister(RSP), second.GetStackIndex()));
3335 }
3336 break;
3337 }
3338
3339 case DataType::Type::kFloat64: {
3340 DCHECK(first.Equals(out));
3341 if (second.IsFpuRegister()) {
3342 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3343 } else if (second.IsConstant()) {
3344 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3345 codegen_->LiteralDoubleAddress(
3346 second.GetConstant()->AsDoubleConstant()->GetValue()));
3347 } else {
3348 DCHECK(second.IsDoubleStackSlot());
3349 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3350 Address(CpuRegister(RSP), second.GetStackIndex()));
3351 }
3352 break;
3353 }
3354
3355 default:
3356 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3357 }
3358 }
3359
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3360 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3361 uint32_t stack_adjustment, bool is_float) {
3362 if (source.IsStackSlot()) {
3363 DCHECK(is_float);
3364 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3365 } else if (source.IsDoubleStackSlot()) {
3366 DCHECK(!is_float);
3367 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3368 } else {
3369 // Write the value to the temporary location on the stack and load to FP stack.
3370 if (is_float) {
3371 Location stack_temp = Location::StackSlot(temp_offset);
3372 codegen_->Move(stack_temp, source);
3373 __ flds(Address(CpuRegister(RSP), temp_offset));
3374 } else {
3375 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3376 codegen_->Move(stack_temp, source);
3377 __ fldl(Address(CpuRegister(RSP), temp_offset));
3378 }
3379 }
3380 }
3381
GenerateRemFP(HRem * rem)3382 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3383 DataType::Type type = rem->GetResultType();
3384 bool is_float = type == DataType::Type::kFloat32;
3385 size_t elem_size = DataType::Size(type);
3386 LocationSummary* locations = rem->GetLocations();
3387 Location first = locations->InAt(0);
3388 Location second = locations->InAt(1);
3389 Location out = locations->Out();
3390
3391 // Create stack space for 2 elements.
3392 // TODO: enhance register allocator to ask for stack temporaries.
3393 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3394
3395 // Load the values to the FP stack in reverse order, using temporaries if needed.
3396 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3397 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3398
3399 // Loop doing FPREM until we stabilize.
3400 NearLabel retry;
3401 __ Bind(&retry);
3402 __ fprem();
3403
3404 // Move FP status to AX.
3405 __ fstsw();
3406
3407 // And see if the argument reduction is complete. This is signaled by the
3408 // C2 FPU flag bit set to 0.
3409 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3410 __ j(kNotEqual, &retry);
3411
3412 // We have settled on the final value. Retrieve it into an XMM register.
3413 // Store FP top of stack to real stack.
3414 if (is_float) {
3415 __ fsts(Address(CpuRegister(RSP), 0));
3416 } else {
3417 __ fstl(Address(CpuRegister(RSP), 0));
3418 }
3419
3420 // Pop the 2 items from the FP stack.
3421 __ fucompp();
3422
3423 // Load the value from the stack into an XMM register.
3424 DCHECK(out.IsFpuRegister()) << out;
3425 if (is_float) {
3426 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3427 } else {
3428 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3429 }
3430
3431 // And remove the temporary stack space we allocated.
3432 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3433 }
3434
DivRemOneOrMinusOne(HBinaryOperation * instruction)3435 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3436 DCHECK(instruction->IsDiv() || instruction->IsRem());
3437
3438 LocationSummary* locations = instruction->GetLocations();
3439 Location second = locations->InAt(1);
3440 DCHECK(second.IsConstant());
3441
3442 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3443 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3444 int64_t imm = Int64FromConstant(second.GetConstant());
3445
3446 DCHECK(imm == 1 || imm == -1);
3447
3448 switch (instruction->GetResultType()) {
3449 case DataType::Type::kInt32: {
3450 if (instruction->IsRem()) {
3451 __ xorl(output_register, output_register);
3452 } else {
3453 __ movl(output_register, input_register);
3454 if (imm == -1) {
3455 __ negl(output_register);
3456 }
3457 }
3458 break;
3459 }
3460
3461 case DataType::Type::kInt64: {
3462 if (instruction->IsRem()) {
3463 __ xorl(output_register, output_register);
3464 } else {
3465 __ movq(output_register, input_register);
3466 if (imm == -1) {
3467 __ negq(output_register);
3468 }
3469 }
3470 break;
3471 }
3472
3473 default:
3474 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3475 }
3476 }
3477
DivByPowerOfTwo(HDiv * instruction)3478 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3479 LocationSummary* locations = instruction->GetLocations();
3480 Location second = locations->InAt(1);
3481
3482 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3483 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3484
3485 int64_t imm = Int64FromConstant(second.GetConstant());
3486 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3487 uint64_t abs_imm = AbsOrMin(imm);
3488
3489 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3490
3491 if (instruction->GetResultType() == DataType::Type::kInt32) {
3492 __ leal(tmp, Address(numerator, abs_imm - 1));
3493 __ testl(numerator, numerator);
3494 __ cmov(kGreaterEqual, tmp, numerator);
3495 int shift = CTZ(imm);
3496 __ sarl(tmp, Immediate(shift));
3497
3498 if (imm < 0) {
3499 __ negl(tmp);
3500 }
3501
3502 __ movl(output_register, tmp);
3503 } else {
3504 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3505 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3506
3507 codegen_->Load64BitValue(rdx, abs_imm - 1);
3508 __ addq(rdx, numerator);
3509 __ testq(numerator, numerator);
3510 __ cmov(kGreaterEqual, rdx, numerator);
3511 int shift = CTZ(imm);
3512 __ sarq(rdx, Immediate(shift));
3513
3514 if (imm < 0) {
3515 __ negq(rdx);
3516 }
3517
3518 __ movq(output_register, rdx);
3519 }
3520 }
3521
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3522 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3523 DCHECK(instruction->IsDiv() || instruction->IsRem());
3524
3525 LocationSummary* locations = instruction->GetLocations();
3526 Location second = locations->InAt(1);
3527
3528 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3529 : locations->GetTemp(0).AsRegister<CpuRegister>();
3530 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3531 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3532 : locations->Out().AsRegister<CpuRegister>();
3533 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3534
3535 DCHECK_EQ(RAX, eax.AsRegister());
3536 DCHECK_EQ(RDX, edx.AsRegister());
3537 if (instruction->IsDiv()) {
3538 DCHECK_EQ(RAX, out.AsRegister());
3539 } else {
3540 DCHECK_EQ(RDX, out.AsRegister());
3541 }
3542
3543 int64_t magic;
3544 int shift;
3545
3546 // TODO: can these branches be written as one?
3547 if (instruction->GetResultType() == DataType::Type::kInt32) {
3548 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3549
3550 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3551
3552 __ movl(numerator, eax);
3553
3554 __ movl(eax, Immediate(magic));
3555 __ imull(numerator);
3556
3557 if (imm > 0 && magic < 0) {
3558 __ addl(edx, numerator);
3559 } else if (imm < 0 && magic > 0) {
3560 __ subl(edx, numerator);
3561 }
3562
3563 if (shift != 0) {
3564 __ sarl(edx, Immediate(shift));
3565 }
3566
3567 __ movl(eax, edx);
3568 __ shrl(edx, Immediate(31));
3569 __ addl(edx, eax);
3570
3571 if (instruction->IsRem()) {
3572 __ movl(eax, numerator);
3573 __ imull(edx, Immediate(imm));
3574 __ subl(eax, edx);
3575 __ movl(edx, eax);
3576 } else {
3577 __ movl(eax, edx);
3578 }
3579 } else {
3580 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3581
3582 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3583
3584 CpuRegister rax = eax;
3585 CpuRegister rdx = edx;
3586
3587 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3588
3589 // Save the numerator.
3590 __ movq(numerator, rax);
3591
3592 // RAX = magic
3593 codegen_->Load64BitValue(rax, magic);
3594
3595 // RDX:RAX = magic * numerator
3596 __ imulq(numerator);
3597
3598 if (imm > 0 && magic < 0) {
3599 // RDX += numerator
3600 __ addq(rdx, numerator);
3601 } else if (imm < 0 && magic > 0) {
3602 // RDX -= numerator
3603 __ subq(rdx, numerator);
3604 }
3605
3606 // Shift if needed.
3607 if (shift != 0) {
3608 __ sarq(rdx, Immediate(shift));
3609 }
3610
3611 // RDX += 1 if RDX < 0
3612 __ movq(rax, rdx);
3613 __ shrq(rdx, Immediate(63));
3614 __ addq(rdx, rax);
3615
3616 if (instruction->IsRem()) {
3617 __ movq(rax, numerator);
3618
3619 if (IsInt<32>(imm)) {
3620 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3621 } else {
3622 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3623 }
3624
3625 __ subq(rax, rdx);
3626 __ movq(rdx, rax);
3627 } else {
3628 __ movq(rax, rdx);
3629 }
3630 }
3631 }
3632
GenerateDivRemIntegral(HBinaryOperation * instruction)3633 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3634 DCHECK(instruction->IsDiv() || instruction->IsRem());
3635 DataType::Type type = instruction->GetResultType();
3636 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3637
3638 bool is_div = instruction->IsDiv();
3639 LocationSummary* locations = instruction->GetLocations();
3640
3641 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3642 Location second = locations->InAt(1);
3643
3644 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3645 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3646
3647 if (second.IsConstant()) {
3648 int64_t imm = Int64FromConstant(second.GetConstant());
3649
3650 if (imm == 0) {
3651 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3652 } else if (imm == 1 || imm == -1) {
3653 DivRemOneOrMinusOne(instruction);
3654 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3655 DivByPowerOfTwo(instruction->AsDiv());
3656 } else {
3657 DCHECK(imm <= -2 || imm >= 2);
3658 GenerateDivRemWithAnyConstant(instruction);
3659 }
3660 } else {
3661 SlowPathCode* slow_path =
3662 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3663 instruction, out.AsRegister(), type, is_div);
3664 codegen_->AddSlowPath(slow_path);
3665
3666 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3667 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3668 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3669 // so it's safe to just use negl instead of more complex comparisons.
3670 if (type == DataType::Type::kInt32) {
3671 __ cmpl(second_reg, Immediate(-1));
3672 __ j(kEqual, slow_path->GetEntryLabel());
3673 // edx:eax <- sign-extended of eax
3674 __ cdq();
3675 // eax = quotient, edx = remainder
3676 __ idivl(second_reg);
3677 } else {
3678 __ cmpq(second_reg, Immediate(-1));
3679 __ j(kEqual, slow_path->GetEntryLabel());
3680 // rdx:rax <- sign-extended of rax
3681 __ cqo();
3682 // rax = quotient, rdx = remainder
3683 __ idivq(second_reg);
3684 }
3685 __ Bind(slow_path->GetExitLabel());
3686 }
3687 }
3688
VisitDiv(HDiv * div)3689 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3690 LocationSummary* locations =
3691 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3692 switch (div->GetResultType()) {
3693 case DataType::Type::kInt32:
3694 case DataType::Type::kInt64: {
3695 locations->SetInAt(0, Location::RegisterLocation(RAX));
3696 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3697 locations->SetOut(Location::SameAsFirstInput());
3698 // Intel uses edx:eax as the dividend.
3699 locations->AddTemp(Location::RegisterLocation(RDX));
3700 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3701 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3702 // output and request another temp.
3703 if (div->InputAt(1)->IsConstant()) {
3704 locations->AddTemp(Location::RequiresRegister());
3705 }
3706 break;
3707 }
3708
3709 case DataType::Type::kFloat32:
3710 case DataType::Type::kFloat64: {
3711 locations->SetInAt(0, Location::RequiresFpuRegister());
3712 locations->SetInAt(1, Location::Any());
3713 locations->SetOut(Location::SameAsFirstInput());
3714 break;
3715 }
3716
3717 default:
3718 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3719 }
3720 }
3721
VisitDiv(HDiv * div)3722 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3723 LocationSummary* locations = div->GetLocations();
3724 Location first = locations->InAt(0);
3725 Location second = locations->InAt(1);
3726 DCHECK(first.Equals(locations->Out()));
3727
3728 DataType::Type type = div->GetResultType();
3729 switch (type) {
3730 case DataType::Type::kInt32:
3731 case DataType::Type::kInt64: {
3732 GenerateDivRemIntegral(div);
3733 break;
3734 }
3735
3736 case DataType::Type::kFloat32: {
3737 if (second.IsFpuRegister()) {
3738 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3739 } else if (second.IsConstant()) {
3740 __ divss(first.AsFpuRegister<XmmRegister>(),
3741 codegen_->LiteralFloatAddress(
3742 second.GetConstant()->AsFloatConstant()->GetValue()));
3743 } else {
3744 DCHECK(second.IsStackSlot());
3745 __ divss(first.AsFpuRegister<XmmRegister>(),
3746 Address(CpuRegister(RSP), second.GetStackIndex()));
3747 }
3748 break;
3749 }
3750
3751 case DataType::Type::kFloat64: {
3752 if (second.IsFpuRegister()) {
3753 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3754 } else if (second.IsConstant()) {
3755 __ divsd(first.AsFpuRegister<XmmRegister>(),
3756 codegen_->LiteralDoubleAddress(
3757 second.GetConstant()->AsDoubleConstant()->GetValue()));
3758 } else {
3759 DCHECK(second.IsDoubleStackSlot());
3760 __ divsd(first.AsFpuRegister<XmmRegister>(),
3761 Address(CpuRegister(RSP), second.GetStackIndex()));
3762 }
3763 break;
3764 }
3765
3766 default:
3767 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3768 }
3769 }
3770
VisitRem(HRem * rem)3771 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3772 DataType::Type type = rem->GetResultType();
3773 LocationSummary* locations =
3774 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
3775
3776 switch (type) {
3777 case DataType::Type::kInt32:
3778 case DataType::Type::kInt64: {
3779 locations->SetInAt(0, Location::RegisterLocation(RAX));
3780 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3781 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3782 locations->SetOut(Location::RegisterLocation(RDX));
3783 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3784 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3785 // output and request another temp.
3786 if (rem->InputAt(1)->IsConstant()) {
3787 locations->AddTemp(Location::RequiresRegister());
3788 }
3789 break;
3790 }
3791
3792 case DataType::Type::kFloat32:
3793 case DataType::Type::kFloat64: {
3794 locations->SetInAt(0, Location::Any());
3795 locations->SetInAt(1, Location::Any());
3796 locations->SetOut(Location::RequiresFpuRegister());
3797 locations->AddTemp(Location::RegisterLocation(RAX));
3798 break;
3799 }
3800
3801 default:
3802 LOG(FATAL) << "Unexpected rem type " << type;
3803 }
3804 }
3805
VisitRem(HRem * rem)3806 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3807 DataType::Type type = rem->GetResultType();
3808 switch (type) {
3809 case DataType::Type::kInt32:
3810 case DataType::Type::kInt64: {
3811 GenerateDivRemIntegral(rem);
3812 break;
3813 }
3814 case DataType::Type::kFloat32:
3815 case DataType::Type::kFloat64: {
3816 GenerateRemFP(rem);
3817 break;
3818 }
3819 default:
3820 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3821 }
3822 }
3823
VisitDivZeroCheck(HDivZeroCheck * instruction)3824 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3825 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3826 locations->SetInAt(0, Location::Any());
3827 }
3828
VisitDivZeroCheck(HDivZeroCheck * instruction)3829 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3830 SlowPathCode* slow_path =
3831 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
3832 codegen_->AddSlowPath(slow_path);
3833
3834 LocationSummary* locations = instruction->GetLocations();
3835 Location value = locations->InAt(0);
3836
3837 switch (instruction->GetType()) {
3838 case DataType::Type::kBool:
3839 case DataType::Type::kUint8:
3840 case DataType::Type::kInt8:
3841 case DataType::Type::kUint16:
3842 case DataType::Type::kInt16:
3843 case DataType::Type::kInt32: {
3844 if (value.IsRegister()) {
3845 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3846 __ j(kEqual, slow_path->GetEntryLabel());
3847 } else if (value.IsStackSlot()) {
3848 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3849 __ j(kEqual, slow_path->GetEntryLabel());
3850 } else {
3851 DCHECK(value.IsConstant()) << value;
3852 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3853 __ jmp(slow_path->GetEntryLabel());
3854 }
3855 }
3856 break;
3857 }
3858 case DataType::Type::kInt64: {
3859 if (value.IsRegister()) {
3860 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3861 __ j(kEqual, slow_path->GetEntryLabel());
3862 } else if (value.IsDoubleStackSlot()) {
3863 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3864 __ j(kEqual, slow_path->GetEntryLabel());
3865 } else {
3866 DCHECK(value.IsConstant()) << value;
3867 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3868 __ jmp(slow_path->GetEntryLabel());
3869 }
3870 }
3871 break;
3872 }
3873 default:
3874 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3875 }
3876 }
3877
HandleShift(HBinaryOperation * op)3878 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3879 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3880
3881 LocationSummary* locations =
3882 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
3883
3884 switch (op->GetResultType()) {
3885 case DataType::Type::kInt32:
3886 case DataType::Type::kInt64: {
3887 locations->SetInAt(0, Location::RequiresRegister());
3888 // The shift count needs to be in CL.
3889 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3890 locations->SetOut(Location::SameAsFirstInput());
3891 break;
3892 }
3893 default:
3894 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3895 }
3896 }
3897
HandleShift(HBinaryOperation * op)3898 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3899 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3900
3901 LocationSummary* locations = op->GetLocations();
3902 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3903 Location second = locations->InAt(1);
3904
3905 switch (op->GetResultType()) {
3906 case DataType::Type::kInt32: {
3907 if (second.IsRegister()) {
3908 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3909 if (op->IsShl()) {
3910 __ shll(first_reg, second_reg);
3911 } else if (op->IsShr()) {
3912 __ sarl(first_reg, second_reg);
3913 } else {
3914 __ shrl(first_reg, second_reg);
3915 }
3916 } else {
3917 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3918 if (op->IsShl()) {
3919 __ shll(first_reg, imm);
3920 } else if (op->IsShr()) {
3921 __ sarl(first_reg, imm);
3922 } else {
3923 __ shrl(first_reg, imm);
3924 }
3925 }
3926 break;
3927 }
3928 case DataType::Type::kInt64: {
3929 if (second.IsRegister()) {
3930 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3931 if (op->IsShl()) {
3932 __ shlq(first_reg, second_reg);
3933 } else if (op->IsShr()) {
3934 __ sarq(first_reg, second_reg);
3935 } else {
3936 __ shrq(first_reg, second_reg);
3937 }
3938 } else {
3939 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3940 if (op->IsShl()) {
3941 __ shlq(first_reg, imm);
3942 } else if (op->IsShr()) {
3943 __ sarq(first_reg, imm);
3944 } else {
3945 __ shrq(first_reg, imm);
3946 }
3947 }
3948 break;
3949 }
3950 default:
3951 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3952 UNREACHABLE();
3953 }
3954 }
3955
VisitRor(HRor * ror)3956 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3957 LocationSummary* locations =
3958 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
3959
3960 switch (ror->GetResultType()) {
3961 case DataType::Type::kInt32:
3962 case DataType::Type::kInt64: {
3963 locations->SetInAt(0, Location::RequiresRegister());
3964 // The shift count needs to be in CL (unless it is a constant).
3965 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3966 locations->SetOut(Location::SameAsFirstInput());
3967 break;
3968 }
3969 default:
3970 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3971 UNREACHABLE();
3972 }
3973 }
3974
VisitRor(HRor * ror)3975 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3976 LocationSummary* locations = ror->GetLocations();
3977 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3978 Location second = locations->InAt(1);
3979
3980 switch (ror->GetResultType()) {
3981 case DataType::Type::kInt32:
3982 if (second.IsRegister()) {
3983 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3984 __ rorl(first_reg, second_reg);
3985 } else {
3986 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3987 __ rorl(first_reg, imm);
3988 }
3989 break;
3990 case DataType::Type::kInt64:
3991 if (second.IsRegister()) {
3992 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3993 __ rorq(first_reg, second_reg);
3994 } else {
3995 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3996 __ rorq(first_reg, imm);
3997 }
3998 break;
3999 default:
4000 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4001 UNREACHABLE();
4002 }
4003 }
4004
VisitShl(HShl * shl)4005 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4006 HandleShift(shl);
4007 }
4008
VisitShl(HShl * shl)4009 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4010 HandleShift(shl);
4011 }
4012
VisitShr(HShr * shr)4013 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4014 HandleShift(shr);
4015 }
4016
VisitShr(HShr * shr)4017 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4018 HandleShift(shr);
4019 }
4020
VisitUShr(HUShr * ushr)4021 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4022 HandleShift(ushr);
4023 }
4024
VisitUShr(HUShr * ushr)4025 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4026 HandleShift(ushr);
4027 }
4028
VisitNewInstance(HNewInstance * instruction)4029 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4030 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4031 instruction, LocationSummary::kCallOnMainOnly);
4032 InvokeRuntimeCallingConvention calling_convention;
4033 if (instruction->IsStringAlloc()) {
4034 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4035 } else {
4036 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4037 }
4038 locations->SetOut(Location::RegisterLocation(RAX));
4039 }
4040
VisitNewInstance(HNewInstance * instruction)4041 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4042 // Note: if heap poisoning is enabled, the entry point takes cares
4043 // of poisoning the reference.
4044 if (instruction->IsStringAlloc()) {
4045 // String is allocated through StringFactory. Call NewEmptyString entry point.
4046 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
4047 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
4048 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
4049 __ call(Address(temp, code_offset.SizeValue()));
4050 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4051 } else {
4052 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4053 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4054 DCHECK(!codegen_->IsLeafMethod());
4055 }
4056 }
4057
VisitNewArray(HNewArray * instruction)4058 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4059 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4060 instruction, LocationSummary::kCallOnMainOnly);
4061 InvokeRuntimeCallingConvention calling_convention;
4062 locations->SetOut(Location::RegisterLocation(RAX));
4063 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4064 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4065 }
4066
VisitNewArray(HNewArray * instruction)4067 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4068 // Note: if heap poisoning is enabled, the entry point takes cares
4069 // of poisoning the reference.
4070 QuickEntrypointEnum entrypoint =
4071 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4072 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4073 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4074 DCHECK(!codegen_->IsLeafMethod());
4075 }
4076
VisitParameterValue(HParameterValue * instruction)4077 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4078 LocationSummary* locations =
4079 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4080 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4081 if (location.IsStackSlot()) {
4082 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4083 } else if (location.IsDoubleStackSlot()) {
4084 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4085 }
4086 locations->SetOut(location);
4087 }
4088
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4089 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4090 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4091 // Nothing to do, the parameter is already at its location.
4092 }
4093
VisitCurrentMethod(HCurrentMethod * instruction)4094 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4095 LocationSummary* locations =
4096 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4097 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4098 }
4099
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4100 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4101 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4102 // Nothing to do, the method is already at its location.
4103 }
4104
VisitClassTableGet(HClassTableGet * instruction)4105 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4106 LocationSummary* locations =
4107 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4108 locations->SetInAt(0, Location::RequiresRegister());
4109 locations->SetOut(Location::RequiresRegister());
4110 }
4111
VisitClassTableGet(HClassTableGet * instruction)4112 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4113 LocationSummary* locations = instruction->GetLocations();
4114 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4115 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4116 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4117 __ movq(locations->Out().AsRegister<CpuRegister>(),
4118 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4119 } else {
4120 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4121 instruction->GetIndex(), kX86_64PointerSize));
4122 __ movq(locations->Out().AsRegister<CpuRegister>(),
4123 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4124 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4125 __ movq(locations->Out().AsRegister<CpuRegister>(),
4126 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4127 }
4128 }
4129
VisitNot(HNot * not_)4130 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4131 LocationSummary* locations =
4132 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4133 locations->SetInAt(0, Location::RequiresRegister());
4134 locations->SetOut(Location::SameAsFirstInput());
4135 }
4136
VisitNot(HNot * not_)4137 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4138 LocationSummary* locations = not_->GetLocations();
4139 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4140 locations->Out().AsRegister<CpuRegister>().AsRegister());
4141 Location out = locations->Out();
4142 switch (not_->GetResultType()) {
4143 case DataType::Type::kInt32:
4144 __ notl(out.AsRegister<CpuRegister>());
4145 break;
4146
4147 case DataType::Type::kInt64:
4148 __ notq(out.AsRegister<CpuRegister>());
4149 break;
4150
4151 default:
4152 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4153 }
4154 }
4155
VisitBooleanNot(HBooleanNot * bool_not)4156 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4157 LocationSummary* locations =
4158 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4159 locations->SetInAt(0, Location::RequiresRegister());
4160 locations->SetOut(Location::SameAsFirstInput());
4161 }
4162
VisitBooleanNot(HBooleanNot * bool_not)4163 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4164 LocationSummary* locations = bool_not->GetLocations();
4165 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4166 locations->Out().AsRegister<CpuRegister>().AsRegister());
4167 Location out = locations->Out();
4168 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4169 }
4170
VisitPhi(HPhi * instruction)4171 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4172 LocationSummary* locations =
4173 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4174 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4175 locations->SetInAt(i, Location::Any());
4176 }
4177 locations->SetOut(Location::Any());
4178 }
4179
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4180 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4181 LOG(FATAL) << "Unimplemented";
4182 }
4183
GenerateMemoryBarrier(MemBarrierKind kind)4184 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4185 /*
4186 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4187 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4188 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4189 */
4190 switch (kind) {
4191 case MemBarrierKind::kAnyAny: {
4192 MemoryFence();
4193 break;
4194 }
4195 case MemBarrierKind::kAnyStore:
4196 case MemBarrierKind::kLoadAny:
4197 case MemBarrierKind::kStoreStore: {
4198 // nop
4199 break;
4200 }
4201 case MemBarrierKind::kNTStoreStore:
4202 // Non-Temporal Store/Store needs an explicit fence.
4203 MemoryFence(/* non-temporal */ true);
4204 break;
4205 }
4206 }
4207
HandleFieldGet(HInstruction * instruction)4208 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4209 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4210
4211 bool object_field_get_with_read_barrier =
4212 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4213 LocationSummary* locations =
4214 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4215 object_field_get_with_read_barrier
4216 ? LocationSummary::kCallOnSlowPath
4217 : LocationSummary::kNoCall);
4218 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4219 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4220 }
4221 locations->SetInAt(0, Location::RequiresRegister());
4222 if (DataType::IsFloatingPointType(instruction->GetType())) {
4223 locations->SetOut(Location::RequiresFpuRegister());
4224 } else {
4225 // The output overlaps for an object field get when read barriers
4226 // are enabled: we do not want the move to overwrite the object's
4227 // location, as we need it to emit the read barrier.
4228 locations->SetOut(
4229 Location::RequiresRegister(),
4230 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4231 }
4232 }
4233
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4234 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4235 const FieldInfo& field_info) {
4236 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4237
4238 LocationSummary* locations = instruction->GetLocations();
4239 Location base_loc = locations->InAt(0);
4240 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4241 Location out = locations->Out();
4242 bool is_volatile = field_info.IsVolatile();
4243 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4244 DataType::Type load_type = instruction->GetType();
4245 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4246
4247 switch (load_type) {
4248 case DataType::Type::kBool:
4249 case DataType::Type::kUint8: {
4250 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4251 break;
4252 }
4253
4254 case DataType::Type::kInt8: {
4255 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4256 break;
4257 }
4258
4259 case DataType::Type::kUint16: {
4260 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4261 break;
4262 }
4263
4264 case DataType::Type::kInt16: {
4265 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4266 break;
4267 }
4268
4269 case DataType::Type::kInt32: {
4270 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4271 break;
4272 }
4273
4274 case DataType::Type::kReference: {
4275 // /* HeapReference<Object> */ out = *(base + offset)
4276 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4277 // Note that a potential implicit null check is handled in this
4278 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4279 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4280 instruction, out, base, offset, /* needs_null_check */ true);
4281 if (is_volatile) {
4282 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4283 }
4284 } else {
4285 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4286 codegen_->MaybeRecordImplicitNullCheck(instruction);
4287 if (is_volatile) {
4288 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4289 }
4290 // If read barriers are enabled, emit read barriers other than
4291 // Baker's using a slow path (and also unpoison the loaded
4292 // reference, if heap poisoning is enabled).
4293 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4294 }
4295 break;
4296 }
4297
4298 case DataType::Type::kInt64: {
4299 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4300 break;
4301 }
4302
4303 case DataType::Type::kFloat32: {
4304 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4305 break;
4306 }
4307
4308 case DataType::Type::kFloat64: {
4309 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4310 break;
4311 }
4312
4313 case DataType::Type::kUint32:
4314 case DataType::Type::kUint64:
4315 case DataType::Type::kVoid:
4316 LOG(FATAL) << "Unreachable type " << load_type;
4317 UNREACHABLE();
4318 }
4319
4320 if (load_type == DataType::Type::kReference) {
4321 // Potential implicit null checks, in the case of reference
4322 // fields, are handled in the previous switch statement.
4323 } else {
4324 codegen_->MaybeRecordImplicitNullCheck(instruction);
4325 }
4326
4327 if (is_volatile) {
4328 if (load_type == DataType::Type::kReference) {
4329 // Memory barriers, in the case of references, are also handled
4330 // in the previous switch statement.
4331 } else {
4332 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4333 }
4334 }
4335 }
4336
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4337 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4338 const FieldInfo& field_info) {
4339 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4340
4341 LocationSummary* locations =
4342 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4343 DataType::Type field_type = field_info.GetFieldType();
4344 bool is_volatile = field_info.IsVolatile();
4345 bool needs_write_barrier =
4346 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4347
4348 locations->SetInAt(0, Location::RequiresRegister());
4349 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4350 if (is_volatile) {
4351 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4352 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4353 } else {
4354 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4355 }
4356 } else {
4357 if (is_volatile) {
4358 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4359 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4360 } else {
4361 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4362 }
4363 }
4364 if (needs_write_barrier) {
4365 // Temporary registers for the write barrier.
4366 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4367 locations->AddTemp(Location::RequiresRegister());
4368 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4369 // Temporary register for the reference poisoning.
4370 locations->AddTemp(Location::RequiresRegister());
4371 }
4372 }
4373
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4374 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4375 const FieldInfo& field_info,
4376 bool value_can_be_null) {
4377 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4378
4379 LocationSummary* locations = instruction->GetLocations();
4380 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4381 Location value = locations->InAt(1);
4382 bool is_volatile = field_info.IsVolatile();
4383 DataType::Type field_type = field_info.GetFieldType();
4384 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4385
4386 if (is_volatile) {
4387 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4388 }
4389
4390 bool maybe_record_implicit_null_check_done = false;
4391
4392 switch (field_type) {
4393 case DataType::Type::kBool:
4394 case DataType::Type::kUint8:
4395 case DataType::Type::kInt8: {
4396 if (value.IsConstant()) {
4397 __ movb(Address(base, offset),
4398 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4399 } else {
4400 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4401 }
4402 break;
4403 }
4404
4405 case DataType::Type::kUint16:
4406 case DataType::Type::kInt16: {
4407 if (value.IsConstant()) {
4408 __ movw(Address(base, offset),
4409 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4410 } else {
4411 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4412 }
4413 break;
4414 }
4415
4416 case DataType::Type::kInt32:
4417 case DataType::Type::kReference: {
4418 if (value.IsConstant()) {
4419 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4420 // `field_type == DataType::Type::kReference` implies `v == 0`.
4421 DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4422 // Note: if heap poisoning is enabled, no need to poison
4423 // (negate) `v` if it is a reference, as it would be null.
4424 __ movl(Address(base, offset), Immediate(v));
4425 } else {
4426 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4427 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4428 __ movl(temp, value.AsRegister<CpuRegister>());
4429 __ PoisonHeapReference(temp);
4430 __ movl(Address(base, offset), temp);
4431 } else {
4432 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4433 }
4434 }
4435 break;
4436 }
4437
4438 case DataType::Type::kInt64: {
4439 if (value.IsConstant()) {
4440 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4441 codegen_->MoveInt64ToAddress(Address(base, offset),
4442 Address(base, offset + sizeof(int32_t)),
4443 v,
4444 instruction);
4445 maybe_record_implicit_null_check_done = true;
4446 } else {
4447 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4448 }
4449 break;
4450 }
4451
4452 case DataType::Type::kFloat32: {
4453 if (value.IsConstant()) {
4454 int32_t v =
4455 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4456 __ movl(Address(base, offset), Immediate(v));
4457 } else {
4458 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4459 }
4460 break;
4461 }
4462
4463 case DataType::Type::kFloat64: {
4464 if (value.IsConstant()) {
4465 int64_t v =
4466 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4467 codegen_->MoveInt64ToAddress(Address(base, offset),
4468 Address(base, offset + sizeof(int32_t)),
4469 v,
4470 instruction);
4471 maybe_record_implicit_null_check_done = true;
4472 } else {
4473 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4474 }
4475 break;
4476 }
4477
4478 case DataType::Type::kUint32:
4479 case DataType::Type::kUint64:
4480 case DataType::Type::kVoid:
4481 LOG(FATAL) << "Unreachable type " << field_type;
4482 UNREACHABLE();
4483 }
4484
4485 if (!maybe_record_implicit_null_check_done) {
4486 codegen_->MaybeRecordImplicitNullCheck(instruction);
4487 }
4488
4489 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4490 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4491 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4492 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4493 }
4494
4495 if (is_volatile) {
4496 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4497 }
4498 }
4499
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4500 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4501 HandleFieldSet(instruction, instruction->GetFieldInfo());
4502 }
4503
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4504 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4505 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4506 }
4507
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4508 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4509 HandleFieldGet(instruction);
4510 }
4511
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4512 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4513 HandleFieldGet(instruction, instruction->GetFieldInfo());
4514 }
4515
VisitStaticFieldGet(HStaticFieldGet * instruction)4516 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4517 HandleFieldGet(instruction);
4518 }
4519
VisitStaticFieldGet(HStaticFieldGet * instruction)4520 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4521 HandleFieldGet(instruction, instruction->GetFieldInfo());
4522 }
4523
VisitStaticFieldSet(HStaticFieldSet * instruction)4524 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4525 HandleFieldSet(instruction, instruction->GetFieldInfo());
4526 }
4527
VisitStaticFieldSet(HStaticFieldSet * instruction)4528 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4529 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4530 }
4531
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4532 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4533 HUnresolvedInstanceFieldGet* instruction) {
4534 FieldAccessCallingConventionX86_64 calling_convention;
4535 codegen_->CreateUnresolvedFieldLocationSummary(
4536 instruction, instruction->GetFieldType(), calling_convention);
4537 }
4538
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4539 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4540 HUnresolvedInstanceFieldGet* instruction) {
4541 FieldAccessCallingConventionX86_64 calling_convention;
4542 codegen_->GenerateUnresolvedFieldAccess(instruction,
4543 instruction->GetFieldType(),
4544 instruction->GetFieldIndex(),
4545 instruction->GetDexPc(),
4546 calling_convention);
4547 }
4548
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4549 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4550 HUnresolvedInstanceFieldSet* instruction) {
4551 FieldAccessCallingConventionX86_64 calling_convention;
4552 codegen_->CreateUnresolvedFieldLocationSummary(
4553 instruction, instruction->GetFieldType(), calling_convention);
4554 }
4555
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4556 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4557 HUnresolvedInstanceFieldSet* instruction) {
4558 FieldAccessCallingConventionX86_64 calling_convention;
4559 codegen_->GenerateUnresolvedFieldAccess(instruction,
4560 instruction->GetFieldType(),
4561 instruction->GetFieldIndex(),
4562 instruction->GetDexPc(),
4563 calling_convention);
4564 }
4565
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4566 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4567 HUnresolvedStaticFieldGet* instruction) {
4568 FieldAccessCallingConventionX86_64 calling_convention;
4569 codegen_->CreateUnresolvedFieldLocationSummary(
4570 instruction, instruction->GetFieldType(), calling_convention);
4571 }
4572
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4573 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4574 HUnresolvedStaticFieldGet* instruction) {
4575 FieldAccessCallingConventionX86_64 calling_convention;
4576 codegen_->GenerateUnresolvedFieldAccess(instruction,
4577 instruction->GetFieldType(),
4578 instruction->GetFieldIndex(),
4579 instruction->GetDexPc(),
4580 calling_convention);
4581 }
4582
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4583 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4584 HUnresolvedStaticFieldSet* instruction) {
4585 FieldAccessCallingConventionX86_64 calling_convention;
4586 codegen_->CreateUnresolvedFieldLocationSummary(
4587 instruction, instruction->GetFieldType(), calling_convention);
4588 }
4589
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4590 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4591 HUnresolvedStaticFieldSet* instruction) {
4592 FieldAccessCallingConventionX86_64 calling_convention;
4593 codegen_->GenerateUnresolvedFieldAccess(instruction,
4594 instruction->GetFieldType(),
4595 instruction->GetFieldIndex(),
4596 instruction->GetDexPc(),
4597 calling_convention);
4598 }
4599
VisitNullCheck(HNullCheck * instruction)4600 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4601 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4602 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4603 ? Location::RequiresRegister()
4604 : Location::Any();
4605 locations->SetInAt(0, loc);
4606 }
4607
GenerateImplicitNullCheck(HNullCheck * instruction)4608 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4609 if (CanMoveNullCheckToUser(instruction)) {
4610 return;
4611 }
4612 LocationSummary* locations = instruction->GetLocations();
4613 Location obj = locations->InAt(0);
4614
4615 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4616 RecordPcInfo(instruction, instruction->GetDexPc());
4617 }
4618
GenerateExplicitNullCheck(HNullCheck * instruction)4619 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4620 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
4621 AddSlowPath(slow_path);
4622
4623 LocationSummary* locations = instruction->GetLocations();
4624 Location obj = locations->InAt(0);
4625
4626 if (obj.IsRegister()) {
4627 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4628 } else if (obj.IsStackSlot()) {
4629 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4630 } else {
4631 DCHECK(obj.IsConstant()) << obj;
4632 DCHECK(obj.GetConstant()->IsNullConstant());
4633 __ jmp(slow_path->GetEntryLabel());
4634 return;
4635 }
4636 __ j(kEqual, slow_path->GetEntryLabel());
4637 }
4638
VisitNullCheck(HNullCheck * instruction)4639 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4640 codegen_->GenerateNullCheck(instruction);
4641 }
4642
VisitArrayGet(HArrayGet * instruction)4643 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4644 bool object_array_get_with_read_barrier =
4645 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4646 LocationSummary* locations =
4647 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4648 object_array_get_with_read_barrier
4649 ? LocationSummary::kCallOnSlowPath
4650 : LocationSummary::kNoCall);
4651 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4652 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4653 }
4654 locations->SetInAt(0, Location::RequiresRegister());
4655 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4656 if (DataType::IsFloatingPointType(instruction->GetType())) {
4657 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4658 } else {
4659 // The output overlaps for an object array get when read barriers
4660 // are enabled: we do not want the move to overwrite the array's
4661 // location, as we need it to emit the read barrier.
4662 locations->SetOut(
4663 Location::RequiresRegister(),
4664 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4665 }
4666 }
4667
VisitArrayGet(HArrayGet * instruction)4668 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4669 LocationSummary* locations = instruction->GetLocations();
4670 Location obj_loc = locations->InAt(0);
4671 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4672 Location index = locations->InAt(1);
4673 Location out_loc = locations->Out();
4674 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
4675
4676 DataType::Type type = instruction->GetType();
4677 switch (type) {
4678 case DataType::Type::kBool:
4679 case DataType::Type::kUint8: {
4680 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4681 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4682 break;
4683 }
4684
4685 case DataType::Type::kInt8: {
4686 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4687 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4688 break;
4689 }
4690
4691 case DataType::Type::kUint16: {
4692 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4693 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
4694 // Branch cases into compressed and uncompressed for each index's type.
4695 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
4696 NearLabel done, not_compressed;
4697 __ testb(Address(obj, count_offset), Immediate(1));
4698 codegen_->MaybeRecordImplicitNullCheck(instruction);
4699 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
4700 "Expecting 0=compressed, 1=uncompressed");
4701 __ j(kNotZero, ¬_compressed);
4702 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4703 __ jmp(&done);
4704 __ Bind(¬_compressed);
4705 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4706 __ Bind(&done);
4707 } else {
4708 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4709 }
4710 break;
4711 }
4712
4713 case DataType::Type::kInt16: {
4714 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4715 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4716 break;
4717 }
4718
4719 case DataType::Type::kInt32: {
4720 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4721 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4722 break;
4723 }
4724
4725 case DataType::Type::kReference: {
4726 static_assert(
4727 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4728 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4729 // /* HeapReference<Object> */ out =
4730 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
4731 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4732 // Note that a potential implicit null check is handled in this
4733 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
4734 codegen_->GenerateArrayLoadWithBakerReadBarrier(
4735 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
4736 } else {
4737 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4738 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4739 codegen_->MaybeRecordImplicitNullCheck(instruction);
4740 // If read barriers are enabled, emit read barriers other than
4741 // Baker's using a slow path (and also unpoison the loaded
4742 // reference, if heap poisoning is enabled).
4743 if (index.IsConstant()) {
4744 uint32_t offset =
4745 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4746 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4747 } else {
4748 codegen_->MaybeGenerateReadBarrierSlow(
4749 instruction, out_loc, out_loc, obj_loc, data_offset, index);
4750 }
4751 }
4752 break;
4753 }
4754
4755 case DataType::Type::kInt64: {
4756 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4757 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4758 break;
4759 }
4760
4761 case DataType::Type::kFloat32: {
4762 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4763 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4764 break;
4765 }
4766
4767 case DataType::Type::kFloat64: {
4768 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4769 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4770 break;
4771 }
4772
4773 case DataType::Type::kUint32:
4774 case DataType::Type::kUint64:
4775 case DataType::Type::kVoid:
4776 LOG(FATAL) << "Unreachable type " << type;
4777 UNREACHABLE();
4778 }
4779
4780 if (type == DataType::Type::kReference) {
4781 // Potential implicit null checks, in the case of reference
4782 // arrays, are handled in the previous switch statement.
4783 } else {
4784 codegen_->MaybeRecordImplicitNullCheck(instruction);
4785 }
4786 }
4787
VisitArraySet(HArraySet * instruction)4788 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4789 DataType::Type value_type = instruction->GetComponentType();
4790
4791 bool needs_write_barrier =
4792 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4793 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4794
4795 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4796 instruction,
4797 may_need_runtime_call_for_type_check ?
4798 LocationSummary::kCallOnSlowPath :
4799 LocationSummary::kNoCall);
4800
4801 locations->SetInAt(0, Location::RequiresRegister());
4802 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4803 if (DataType::IsFloatingPointType(value_type)) {
4804 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4805 } else {
4806 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4807 }
4808
4809 if (needs_write_barrier) {
4810 // Temporary registers for the write barrier.
4811 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
4812 locations->AddTemp(Location::RequiresRegister());
4813 }
4814 }
4815
VisitArraySet(HArraySet * instruction)4816 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4817 LocationSummary* locations = instruction->GetLocations();
4818 Location array_loc = locations->InAt(0);
4819 CpuRegister array = array_loc.AsRegister<CpuRegister>();
4820 Location index = locations->InAt(1);
4821 Location value = locations->InAt(2);
4822 DataType::Type value_type = instruction->GetComponentType();
4823 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4824 bool needs_write_barrier =
4825 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4826 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4827 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4828 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4829
4830 switch (value_type) {
4831 case DataType::Type::kBool:
4832 case DataType::Type::kUint8:
4833 case DataType::Type::kInt8: {
4834 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4835 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
4836 if (value.IsRegister()) {
4837 __ movb(address, value.AsRegister<CpuRegister>());
4838 } else {
4839 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4840 }
4841 codegen_->MaybeRecordImplicitNullCheck(instruction);
4842 break;
4843 }
4844
4845 case DataType::Type::kUint16:
4846 case DataType::Type::kInt16: {
4847 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4848 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
4849 if (value.IsRegister()) {
4850 __ movw(address, value.AsRegister<CpuRegister>());
4851 } else {
4852 DCHECK(value.IsConstant()) << value;
4853 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4854 }
4855 codegen_->MaybeRecordImplicitNullCheck(instruction);
4856 break;
4857 }
4858
4859 case DataType::Type::kReference: {
4860 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4861 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4862
4863 if (!value.IsRegister()) {
4864 // Just setting null.
4865 DCHECK(instruction->InputAt(2)->IsNullConstant());
4866 DCHECK(value.IsConstant()) << value;
4867 __ movl(address, Immediate(0));
4868 codegen_->MaybeRecordImplicitNullCheck(instruction);
4869 DCHECK(!needs_write_barrier);
4870 DCHECK(!may_need_runtime_call_for_type_check);
4871 break;
4872 }
4873
4874 DCHECK(needs_write_barrier);
4875 CpuRegister register_value = value.AsRegister<CpuRegister>();
4876 // We cannot use a NearLabel for `done`, as its range may be too
4877 // short when Baker read barriers are enabled.
4878 Label done;
4879 NearLabel not_null, do_put;
4880 SlowPathCode* slow_path = nullptr;
4881 Location temp_loc = locations->GetTemp(0);
4882 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4883 if (may_need_runtime_call_for_type_check) {
4884 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
4885 codegen_->AddSlowPath(slow_path);
4886 if (instruction->GetValueCanBeNull()) {
4887 __ testl(register_value, register_value);
4888 __ j(kNotEqual, ¬_null);
4889 __ movl(address, Immediate(0));
4890 codegen_->MaybeRecordImplicitNullCheck(instruction);
4891 __ jmp(&done);
4892 __ Bind(¬_null);
4893 }
4894
4895 // Note that when Baker read barriers are enabled, the type
4896 // checks are performed without read barriers. This is fine,
4897 // even in the case where a class object is in the from-space
4898 // after the flip, as a comparison involving such a type would
4899 // not produce a false positive; it may of course produce a
4900 // false negative, in which case we would take the ArraySet
4901 // slow path.
4902
4903 // /* HeapReference<Class> */ temp = array->klass_
4904 __ movl(temp, Address(array, class_offset));
4905 codegen_->MaybeRecordImplicitNullCheck(instruction);
4906 __ MaybeUnpoisonHeapReference(temp);
4907
4908 // /* HeapReference<Class> */ temp = temp->component_type_
4909 __ movl(temp, Address(temp, component_offset));
4910 // If heap poisoning is enabled, no need to unpoison `temp`
4911 // nor the object reference in `register_value->klass`, as
4912 // we are comparing two poisoned references.
4913 __ cmpl(temp, Address(register_value, class_offset));
4914
4915 if (instruction->StaticTypeOfArrayIsObjectArray()) {
4916 __ j(kEqual, &do_put);
4917 // If heap poisoning is enabled, the `temp` reference has
4918 // not been unpoisoned yet; unpoison it now.
4919 __ MaybeUnpoisonHeapReference(temp);
4920
4921 // If heap poisoning is enabled, no need to unpoison the
4922 // heap reference loaded below, as it is only used for a
4923 // comparison with null.
4924 __ cmpl(Address(temp, super_offset), Immediate(0));
4925 __ j(kNotEqual, slow_path->GetEntryLabel());
4926 __ Bind(&do_put);
4927 } else {
4928 __ j(kNotEqual, slow_path->GetEntryLabel());
4929 }
4930 }
4931
4932 if (kPoisonHeapReferences) {
4933 __ movl(temp, register_value);
4934 __ PoisonHeapReference(temp);
4935 __ movl(address, temp);
4936 } else {
4937 __ movl(address, register_value);
4938 }
4939 if (!may_need_runtime_call_for_type_check) {
4940 codegen_->MaybeRecordImplicitNullCheck(instruction);
4941 }
4942
4943 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4944 codegen_->MarkGCCard(
4945 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4946 __ Bind(&done);
4947
4948 if (slow_path != nullptr) {
4949 __ Bind(slow_path->GetExitLabel());
4950 }
4951
4952 break;
4953 }
4954
4955 case DataType::Type::kInt32: {
4956 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4957 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4958 if (value.IsRegister()) {
4959 __ movl(address, value.AsRegister<CpuRegister>());
4960 } else {
4961 DCHECK(value.IsConstant()) << value;
4962 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4963 __ movl(address, Immediate(v));
4964 }
4965 codegen_->MaybeRecordImplicitNullCheck(instruction);
4966 break;
4967 }
4968
4969 case DataType::Type::kInt64: {
4970 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4971 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
4972 if (value.IsRegister()) {
4973 __ movq(address, value.AsRegister<CpuRegister>());
4974 codegen_->MaybeRecordImplicitNullCheck(instruction);
4975 } else {
4976 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4977 Address address_high =
4978 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
4979 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4980 }
4981 break;
4982 }
4983
4984 case DataType::Type::kFloat32: {
4985 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4986 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4987 if (value.IsFpuRegister()) {
4988 __ movss(address, value.AsFpuRegister<XmmRegister>());
4989 } else {
4990 DCHECK(value.IsConstant());
4991 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4992 __ movl(address, Immediate(v));
4993 }
4994 codegen_->MaybeRecordImplicitNullCheck(instruction);
4995 break;
4996 }
4997
4998 case DataType::Type::kFloat64: {
4999 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5000 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5001 if (value.IsFpuRegister()) {
5002 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5003 codegen_->MaybeRecordImplicitNullCheck(instruction);
5004 } else {
5005 int64_t v =
5006 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5007 Address address_high =
5008 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5009 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5010 }
5011 break;
5012 }
5013
5014 case DataType::Type::kUint32:
5015 case DataType::Type::kUint64:
5016 case DataType::Type::kVoid:
5017 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5018 UNREACHABLE();
5019 }
5020 }
5021
VisitArrayLength(HArrayLength * instruction)5022 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5023 LocationSummary* locations =
5024 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5025 locations->SetInAt(0, Location::RequiresRegister());
5026 if (!instruction->IsEmittedAtUseSite()) {
5027 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5028 }
5029 }
5030
VisitArrayLength(HArrayLength * instruction)5031 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5032 if (instruction->IsEmittedAtUseSite()) {
5033 return;
5034 }
5035
5036 LocationSummary* locations = instruction->GetLocations();
5037 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5038 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5039 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5040 __ movl(out, Address(obj, offset));
5041 codegen_->MaybeRecordImplicitNullCheck(instruction);
5042 // Mask out most significant bit in case the array is String's array of char.
5043 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5044 __ shrl(out, Immediate(1));
5045 }
5046 }
5047
VisitBoundsCheck(HBoundsCheck * instruction)5048 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5049 RegisterSet caller_saves = RegisterSet::Empty();
5050 InvokeRuntimeCallingConvention calling_convention;
5051 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5052 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5053 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5054 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5055 HInstruction* length = instruction->InputAt(1);
5056 if (!length->IsEmittedAtUseSite()) {
5057 locations->SetInAt(1, Location::RegisterOrConstant(length));
5058 }
5059 }
5060
VisitBoundsCheck(HBoundsCheck * instruction)5061 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5062 LocationSummary* locations = instruction->GetLocations();
5063 Location index_loc = locations->InAt(0);
5064 Location length_loc = locations->InAt(1);
5065 SlowPathCode* slow_path =
5066 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5067
5068 if (length_loc.IsConstant()) {
5069 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5070 if (index_loc.IsConstant()) {
5071 // BCE will remove the bounds check if we are guarenteed to pass.
5072 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5073 if (index < 0 || index >= length) {
5074 codegen_->AddSlowPath(slow_path);
5075 __ jmp(slow_path->GetEntryLabel());
5076 } else {
5077 // Some optimization after BCE may have generated this, and we should not
5078 // generate a bounds check if it is a valid range.
5079 }
5080 return;
5081 }
5082
5083 // We have to reverse the jump condition because the length is the constant.
5084 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5085 __ cmpl(index_reg, Immediate(length));
5086 codegen_->AddSlowPath(slow_path);
5087 __ j(kAboveEqual, slow_path->GetEntryLabel());
5088 } else {
5089 HInstruction* array_length = instruction->InputAt(1);
5090 if (array_length->IsEmittedAtUseSite()) {
5091 // Address the length field in the array.
5092 DCHECK(array_length->IsArrayLength());
5093 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5094 Location array_loc = array_length->GetLocations()->InAt(0);
5095 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5096 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5097 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5098 // the string compression flag) with the in-memory length and avoid the temporary.
5099 CpuRegister length_reg = CpuRegister(TMP);
5100 __ movl(length_reg, array_len);
5101 codegen_->MaybeRecordImplicitNullCheck(array_length);
5102 __ shrl(length_reg, Immediate(1));
5103 codegen_->GenerateIntCompare(length_reg, index_loc);
5104 } else {
5105 // Checking the bound for general case:
5106 // Array of char or String's array when the compression feature off.
5107 if (index_loc.IsConstant()) {
5108 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5109 __ cmpl(array_len, Immediate(value));
5110 } else {
5111 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5112 }
5113 codegen_->MaybeRecordImplicitNullCheck(array_length);
5114 }
5115 } else {
5116 codegen_->GenerateIntCompare(length_loc, index_loc);
5117 }
5118 codegen_->AddSlowPath(slow_path);
5119 __ j(kBelowEqual, slow_path->GetEntryLabel());
5120 }
5121 }
5122
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5123 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5124 CpuRegister card,
5125 CpuRegister object,
5126 CpuRegister value,
5127 bool value_can_be_null) {
5128 NearLabel is_null;
5129 if (value_can_be_null) {
5130 __ testl(value, value);
5131 __ j(kEqual, &is_null);
5132 }
5133 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5134 /* no_rip */ true));
5135 __ movq(temp, object);
5136 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5137 __ movb(Address(temp, card, TIMES_1, 0), card);
5138 if (value_can_be_null) {
5139 __ Bind(&is_null);
5140 }
5141 }
5142
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5143 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5144 LOG(FATAL) << "Unimplemented";
5145 }
5146
VisitParallelMove(HParallelMove * instruction)5147 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5148 if (instruction->GetNext()->IsSuspendCheck() &&
5149 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5150 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5151 // The back edge will generate the suspend check.
5152 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5153 }
5154
5155 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5156 }
5157
VisitSuspendCheck(HSuspendCheck * instruction)5158 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5159 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5160 instruction, LocationSummary::kCallOnSlowPath);
5161 // In suspend check slow path, usually there are no caller-save registers at all.
5162 // If SIMD instructions are present, however, we force spilling all live SIMD
5163 // registers in full width (since the runtime only saves/restores lower part).
5164 locations->SetCustomSlowPathCallerSaves(
5165 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5166 }
5167
VisitSuspendCheck(HSuspendCheck * instruction)5168 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5169 HBasicBlock* block = instruction->GetBlock();
5170 if (block->GetLoopInformation() != nullptr) {
5171 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5172 // The back edge will generate the suspend check.
5173 return;
5174 }
5175 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5176 // The goto will generate the suspend check.
5177 return;
5178 }
5179 GenerateSuspendCheck(instruction, nullptr);
5180 }
5181
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5182 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5183 HBasicBlock* successor) {
5184 SuspendCheckSlowPathX86_64* slow_path =
5185 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5186 if (slow_path == nullptr) {
5187 slow_path =
5188 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5189 instruction->SetSlowPath(slow_path);
5190 codegen_->AddSlowPath(slow_path);
5191 if (successor != nullptr) {
5192 DCHECK(successor->IsLoopHeader());
5193 }
5194 } else {
5195 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5196 }
5197
5198 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5199 /* no_rip */ true),
5200 Immediate(0));
5201 if (successor == nullptr) {
5202 __ j(kNotEqual, slow_path->GetEntryLabel());
5203 __ Bind(slow_path->GetReturnLabel());
5204 } else {
5205 __ j(kEqual, codegen_->GetLabelOf(successor));
5206 __ jmp(slow_path->GetEntryLabel());
5207 }
5208 }
5209
GetAssembler() const5210 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5211 return codegen_->GetAssembler();
5212 }
5213
EmitMove(size_t index)5214 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5215 MoveOperands* move = moves_[index];
5216 Location source = move->GetSource();
5217 Location destination = move->GetDestination();
5218
5219 if (source.IsRegister()) {
5220 if (destination.IsRegister()) {
5221 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5222 } else if (destination.IsStackSlot()) {
5223 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5224 source.AsRegister<CpuRegister>());
5225 } else {
5226 DCHECK(destination.IsDoubleStackSlot());
5227 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5228 source.AsRegister<CpuRegister>());
5229 }
5230 } else if (source.IsStackSlot()) {
5231 if (destination.IsRegister()) {
5232 __ movl(destination.AsRegister<CpuRegister>(),
5233 Address(CpuRegister(RSP), source.GetStackIndex()));
5234 } else if (destination.IsFpuRegister()) {
5235 __ movss(destination.AsFpuRegister<XmmRegister>(),
5236 Address(CpuRegister(RSP), source.GetStackIndex()));
5237 } else {
5238 DCHECK(destination.IsStackSlot());
5239 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5240 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5241 }
5242 } else if (source.IsDoubleStackSlot()) {
5243 if (destination.IsRegister()) {
5244 __ movq(destination.AsRegister<CpuRegister>(),
5245 Address(CpuRegister(RSP), source.GetStackIndex()));
5246 } else if (destination.IsFpuRegister()) {
5247 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5248 Address(CpuRegister(RSP), source.GetStackIndex()));
5249 } else {
5250 DCHECK(destination.IsDoubleStackSlot()) << destination;
5251 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5252 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5253 }
5254 } else if (source.IsSIMDStackSlot()) {
5255 if (destination.IsFpuRegister()) {
5256 __ movups(destination.AsFpuRegister<XmmRegister>(),
5257 Address(CpuRegister(RSP), source.GetStackIndex()));
5258 } else {
5259 DCHECK(destination.IsSIMDStackSlot());
5260 size_t high = kX86_64WordSize;
5261 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5262 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5263 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5264 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5265 }
5266 } else if (source.IsConstant()) {
5267 HConstant* constant = source.GetConstant();
5268 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5269 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5270 if (destination.IsRegister()) {
5271 if (value == 0) {
5272 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5273 } else {
5274 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5275 }
5276 } else {
5277 DCHECK(destination.IsStackSlot()) << destination;
5278 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5279 }
5280 } else if (constant->IsLongConstant()) {
5281 int64_t value = constant->AsLongConstant()->GetValue();
5282 if (destination.IsRegister()) {
5283 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5284 } else {
5285 DCHECK(destination.IsDoubleStackSlot()) << destination;
5286 codegen_->Store64BitValueToStack(destination, value);
5287 }
5288 } else if (constant->IsFloatConstant()) {
5289 float fp_value = constant->AsFloatConstant()->GetValue();
5290 if (destination.IsFpuRegister()) {
5291 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5292 codegen_->Load32BitValue(dest, fp_value);
5293 } else {
5294 DCHECK(destination.IsStackSlot()) << destination;
5295 Immediate imm(bit_cast<int32_t, float>(fp_value));
5296 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5297 }
5298 } else {
5299 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5300 double fp_value = constant->AsDoubleConstant()->GetValue();
5301 int64_t value = bit_cast<int64_t, double>(fp_value);
5302 if (destination.IsFpuRegister()) {
5303 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5304 codegen_->Load64BitValue(dest, fp_value);
5305 } else {
5306 DCHECK(destination.IsDoubleStackSlot()) << destination;
5307 codegen_->Store64BitValueToStack(destination, value);
5308 }
5309 }
5310 } else if (source.IsFpuRegister()) {
5311 if (destination.IsFpuRegister()) {
5312 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5313 } else if (destination.IsStackSlot()) {
5314 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5315 source.AsFpuRegister<XmmRegister>());
5316 } else if (destination.IsDoubleStackSlot()) {
5317 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5318 source.AsFpuRegister<XmmRegister>());
5319 } else {
5320 DCHECK(destination.IsSIMDStackSlot());
5321 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5322 source.AsFpuRegister<XmmRegister>());
5323 }
5324 }
5325 }
5326
Exchange32(CpuRegister reg,int mem)5327 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5328 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5329 __ movl(Address(CpuRegister(RSP), mem), reg);
5330 __ movl(reg, CpuRegister(TMP));
5331 }
5332
Exchange64(CpuRegister reg1,CpuRegister reg2)5333 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5334 __ movq(CpuRegister(TMP), reg1);
5335 __ movq(reg1, reg2);
5336 __ movq(reg2, CpuRegister(TMP));
5337 }
5338
Exchange64(CpuRegister reg,int mem)5339 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5340 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5341 __ movq(Address(CpuRegister(RSP), mem), reg);
5342 __ movq(reg, CpuRegister(TMP));
5343 }
5344
Exchange32(XmmRegister reg,int mem)5345 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5346 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5347 __ movss(Address(CpuRegister(RSP), mem), reg);
5348 __ movd(reg, CpuRegister(TMP));
5349 }
5350
Exchange64(XmmRegister reg,int mem)5351 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5352 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5353 __ movsd(Address(CpuRegister(RSP), mem), reg);
5354 __ movd(reg, CpuRegister(TMP));
5355 }
5356
Exchange128(XmmRegister reg,int mem)5357 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5358 size_t extra_slot = 2 * kX86_64WordSize;
5359 __ subq(CpuRegister(RSP), Immediate(extra_slot));
5360 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5361 ExchangeMemory64(0, mem + extra_slot, 2);
5362 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5363 __ addq(CpuRegister(RSP), Immediate(extra_slot));
5364 }
5365
ExchangeMemory32(int mem1,int mem2)5366 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5367 ScratchRegisterScope ensure_scratch(
5368 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5369
5370 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5371 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5372 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5373 Address(CpuRegister(RSP), mem2 + stack_offset));
5374 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5375 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5376 CpuRegister(ensure_scratch.GetRegister()));
5377 }
5378
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5379 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5380 ScratchRegisterScope ensure_scratch(
5381 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5382
5383 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5384
5385 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5386 for (int i = 0; i < num_of_qwords; i++) {
5387 __ movq(CpuRegister(TMP),
5388 Address(CpuRegister(RSP), mem1 + stack_offset));
5389 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5390 Address(CpuRegister(RSP), mem2 + stack_offset));
5391 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5392 CpuRegister(TMP));
5393 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5394 CpuRegister(ensure_scratch.GetRegister()));
5395 stack_offset += kX86_64WordSize;
5396 }
5397 }
5398
EmitSwap(size_t index)5399 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5400 MoveOperands* move = moves_[index];
5401 Location source = move->GetSource();
5402 Location destination = move->GetDestination();
5403
5404 if (source.IsRegister() && destination.IsRegister()) {
5405 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5406 } else if (source.IsRegister() && destination.IsStackSlot()) {
5407 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5408 } else if (source.IsStackSlot() && destination.IsRegister()) {
5409 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5410 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5411 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5412 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5413 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5414 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5415 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5416 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5417 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5418 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5419 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5420 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5421 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5422 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5423 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5424 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5425 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5426 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5427 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5428 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5429 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5430 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5431 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5432 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5433 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5434 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5435 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5436 } else {
5437 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5438 }
5439 }
5440
5441
SpillScratch(int reg)5442 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5443 __ pushq(CpuRegister(reg));
5444 }
5445
5446
RestoreScratch(int reg)5447 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5448 __ popq(CpuRegister(reg));
5449 }
5450
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5451 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5452 SlowPathCode* slow_path, CpuRegister class_reg) {
5453 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5454 const size_t status_byte_offset =
5455 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5456 constexpr uint32_t shifted_initialized_value =
5457 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
5458
5459 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
5460 __ j(kBelow, slow_path->GetEntryLabel());
5461 __ Bind(slow_path->GetExitLabel());
5462 // No need for memory fence, thanks to the x86-64 memory model.
5463 }
5464
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5465 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5466 HLoadClass::LoadKind desired_class_load_kind) {
5467 switch (desired_class_load_kind) {
5468 case HLoadClass::LoadKind::kInvalid:
5469 LOG(FATAL) << "UNREACHABLE";
5470 UNREACHABLE();
5471 case HLoadClass::LoadKind::kReferrersClass:
5472 break;
5473 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5474 case HLoadClass::LoadKind::kBootImageClassTable:
5475 case HLoadClass::LoadKind::kBssEntry:
5476 DCHECK(!Runtime::Current()->UseJitCompilation());
5477 break;
5478 case HLoadClass::LoadKind::kJitTableAddress:
5479 DCHECK(Runtime::Current()->UseJitCompilation());
5480 break;
5481 case HLoadClass::LoadKind::kBootImageAddress:
5482 case HLoadClass::LoadKind::kRuntimeCall:
5483 break;
5484 }
5485 return desired_class_load_kind;
5486 }
5487
VisitLoadClass(HLoadClass * cls)5488 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5489 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5490 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5491 // Custom calling convention: RAX serves as both input and output.
5492 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5493 cls,
5494 Location::RegisterLocation(RAX),
5495 Location::RegisterLocation(RAX));
5496 return;
5497 }
5498 DCHECK(!cls->NeedsAccessCheck());
5499
5500 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5501 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5502 ? LocationSummary::kCallOnSlowPath
5503 : LocationSummary::kNoCall;
5504 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5505 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5506 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5507 }
5508
5509 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5510 locations->SetInAt(0, Location::RequiresRegister());
5511 }
5512 locations->SetOut(Location::RequiresRegister());
5513 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5514 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5515 // Rely on the type resolution and/or initialization to save everything.
5516 // Custom calling convention: RAX serves as both input and output.
5517 RegisterSet caller_saves = RegisterSet::Empty();
5518 caller_saves.Add(Location::RegisterLocation(RAX));
5519 locations->SetCustomSlowPathCallerSaves(caller_saves);
5520 } else {
5521 // For non-Baker read barrier we have a temp-clobbering call.
5522 }
5523 }
5524 }
5525
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)5526 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5527 dex::TypeIndex type_index,
5528 Handle<mirror::Class> handle) {
5529 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
5530 // Add a patch entry and return the label.
5531 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
5532 PatchInfo<Label>* info = &jit_class_patches_.back();
5533 return &info->label;
5534 }
5535
5536 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5537 // move.
VisitLoadClass(HLoadClass * cls)5538 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5539 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5540 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5541 codegen_->GenerateLoadClassRuntimeCall(cls);
5542 return;
5543 }
5544 DCHECK(!cls->NeedsAccessCheck());
5545
5546 LocationSummary* locations = cls->GetLocations();
5547 Location out_loc = locations->Out();
5548 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5549
5550 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5551 ? kWithoutReadBarrier
5552 : kCompilerReadBarrierOption;
5553 bool generate_null_check = false;
5554 switch (load_kind) {
5555 case HLoadClass::LoadKind::kReferrersClass: {
5556 DCHECK(!cls->CanCallRuntime());
5557 DCHECK(!cls->MustGenerateClinitCheck());
5558 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5559 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5560 GenerateGcRootFieldLoad(
5561 cls,
5562 out_loc,
5563 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5564 /* fixup_label */ nullptr,
5565 read_barrier_option);
5566 break;
5567 }
5568 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5569 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5570 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5571 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5572 codegen_->RecordBootImageTypePatch(cls);
5573 break;
5574 case HLoadClass::LoadKind::kBootImageAddress: {
5575 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5576 uint32_t address = dchecked_integral_cast<uint32_t>(
5577 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
5578 DCHECK_NE(address, 0u);
5579 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
5580 break;
5581 }
5582 case HLoadClass::LoadKind::kBootImageClassTable: {
5583 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5584 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5585 codegen_->RecordBootImageTypePatch(cls);
5586 // Extract the reference from the slot data, i.e. clear the hash bits.
5587 int32_t masked_hash = ClassTable::TableSlot::MaskHash(
5588 ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
5589 if (masked_hash != 0) {
5590 __ subl(out, Immediate(masked_hash));
5591 }
5592 break;
5593 }
5594 case HLoadClass::LoadKind::kBssEntry: {
5595 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5596 /* no_rip */ false);
5597 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5598 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
5599 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5600 generate_null_check = true;
5601 break;
5602 }
5603 case HLoadClass::LoadKind::kJitTableAddress: {
5604 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5605 /* no_rip */ true);
5606 Label* fixup_label =
5607 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5608 // /* GcRoot<mirror::Class> */ out = *address
5609 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5610 break;
5611 }
5612 default:
5613 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5614 UNREACHABLE();
5615 }
5616
5617 if (generate_null_check || cls->MustGenerateClinitCheck()) {
5618 DCHECK(cls->CanCallRuntime());
5619 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
5620 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5621 codegen_->AddSlowPath(slow_path);
5622 if (generate_null_check) {
5623 __ testl(out, out);
5624 __ j(kEqual, slow_path->GetEntryLabel());
5625 }
5626 if (cls->MustGenerateClinitCheck()) {
5627 GenerateClassInitializationCheck(slow_path, out);
5628 } else {
5629 __ Bind(slow_path->GetExitLabel());
5630 }
5631 }
5632 }
5633
VisitClinitCheck(HClinitCheck * check)5634 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5635 LocationSummary* locations =
5636 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5637 locations->SetInAt(0, Location::RequiresRegister());
5638 if (check->HasUses()) {
5639 locations->SetOut(Location::SameAsFirstInput());
5640 }
5641 }
5642
VisitClinitCheck(HClinitCheck * check)5643 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5644 // We assume the class to not be null.
5645 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
5646 check->GetLoadClass(), check, check->GetDexPc(), true);
5647 codegen_->AddSlowPath(slow_path);
5648 GenerateClassInitializationCheck(slow_path,
5649 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5650 }
5651
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5652 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5653 HLoadString::LoadKind desired_string_load_kind) {
5654 switch (desired_string_load_kind) {
5655 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5656 case HLoadString::LoadKind::kBootImageInternTable:
5657 case HLoadString::LoadKind::kBssEntry:
5658 DCHECK(!Runtime::Current()->UseJitCompilation());
5659 break;
5660 case HLoadString::LoadKind::kJitTableAddress:
5661 DCHECK(Runtime::Current()->UseJitCompilation());
5662 break;
5663 case HLoadString::LoadKind::kBootImageAddress:
5664 case HLoadString::LoadKind::kRuntimeCall:
5665 break;
5666 }
5667 return desired_string_load_kind;
5668 }
5669
VisitLoadString(HLoadString * load)5670 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5671 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5672 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5673 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5674 locations->SetOut(Location::RegisterLocation(RAX));
5675 } else {
5676 locations->SetOut(Location::RequiresRegister());
5677 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5678 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5679 // Rely on the pResolveString to save everything.
5680 // Custom calling convention: RAX serves as both input and output.
5681 RegisterSet caller_saves = RegisterSet::Empty();
5682 caller_saves.Add(Location::RegisterLocation(RAX));
5683 locations->SetCustomSlowPathCallerSaves(caller_saves);
5684 } else {
5685 // For non-Baker read barrier we have a temp-clobbering call.
5686 }
5687 }
5688 }
5689 }
5690
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)5691 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
5692 dex::StringIndex string_index,
5693 Handle<mirror::String> handle) {
5694 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
5695 // Add a patch entry and return the label.
5696 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
5697 PatchInfo<Label>* info = &jit_string_patches_.back();
5698 return &info->label;
5699 }
5700
5701 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5702 // move.
VisitLoadString(HLoadString * load)5703 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5704 LocationSummary* locations = load->GetLocations();
5705 Location out_loc = locations->Out();
5706 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5707
5708 switch (load->GetLoadKind()) {
5709 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5710 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5711 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5712 codegen_->RecordBootImageStringPatch(load);
5713 return;
5714 }
5715 case HLoadString::LoadKind::kBootImageAddress: {
5716 uint32_t address = dchecked_integral_cast<uint32_t>(
5717 reinterpret_cast<uintptr_t>(load->GetString().Get()));
5718 DCHECK_NE(address, 0u);
5719 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
5720 return;
5721 }
5722 case HLoadString::LoadKind::kBootImageInternTable: {
5723 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5724 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5725 codegen_->RecordBootImageStringPatch(load);
5726 return;
5727 }
5728 case HLoadString::LoadKind::kBssEntry: {
5729 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5730 /* no_rip */ false);
5731 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
5732 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
5733 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5734 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
5735 codegen_->AddSlowPath(slow_path);
5736 __ testl(out, out);
5737 __ j(kEqual, slow_path->GetEntryLabel());
5738 __ Bind(slow_path->GetExitLabel());
5739 return;
5740 }
5741 case HLoadString::LoadKind::kJitTableAddress: {
5742 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5743 /* no_rip */ true);
5744 Label* fixup_label = codegen_->NewJitRootStringPatch(
5745 load->GetDexFile(), load->GetStringIndex(), load->GetString());
5746 // /* GcRoot<mirror::String> */ out = *address
5747 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5748 return;
5749 }
5750 default:
5751 break;
5752 }
5753
5754 // TODO: Re-add the compiler code to do string dex cache lookup again.
5755 // Custom calling convention: RAX serves as both input and output.
5756 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
5757 codegen_->InvokeRuntime(kQuickResolveString,
5758 load,
5759 load->GetDexPc());
5760 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5761 }
5762
GetExceptionTlsAddress()5763 static Address GetExceptionTlsAddress() {
5764 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
5765 /* no_rip */ true);
5766 }
5767
VisitLoadException(HLoadException * load)5768 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5769 LocationSummary* locations =
5770 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5771 locations->SetOut(Location::RequiresRegister());
5772 }
5773
VisitLoadException(HLoadException * load)5774 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5775 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5776 }
5777
VisitClearException(HClearException * clear)5778 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5779 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5780 }
5781
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5782 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5783 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5784 }
5785
VisitThrow(HThrow * instruction)5786 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5787 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5788 instruction, LocationSummary::kCallOnMainOnly);
5789 InvokeRuntimeCallingConvention calling_convention;
5790 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5791 }
5792
VisitThrow(HThrow * instruction)5793 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5794 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5795 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5796 }
5797
CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5798 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5799 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
5800 // We need a temporary for holding the iftable length.
5801 return true;
5802 }
5803 return kEmitCompilerReadBarrier &&
5804 !kUseBakerReadBarrier &&
5805 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5806 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5807 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5808 }
5809
InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5810 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5811 return kEmitCompilerReadBarrier &&
5812 !kUseBakerReadBarrier &&
5813 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5814 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5815 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5816 }
5817
VisitInstanceOf(HInstanceOf * instruction)5818 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5819 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5820 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5821 bool baker_read_barrier_slow_path = false;
5822 switch (type_check_kind) {
5823 case TypeCheckKind::kExactCheck:
5824 case TypeCheckKind::kAbstractClassCheck:
5825 case TypeCheckKind::kClassHierarchyCheck:
5826 case TypeCheckKind::kArrayObjectCheck: {
5827 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
5828 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5829 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
5830 break;
5831 }
5832 case TypeCheckKind::kArrayCheck:
5833 case TypeCheckKind::kUnresolvedCheck:
5834 case TypeCheckKind::kInterfaceCheck:
5835 call_kind = LocationSummary::kCallOnSlowPath;
5836 break;
5837 }
5838
5839 LocationSummary* locations =
5840 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
5841 if (baker_read_barrier_slow_path) {
5842 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5843 }
5844 locations->SetInAt(0, Location::RequiresRegister());
5845 locations->SetInAt(1, Location::Any());
5846 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5847 locations->SetOut(Location::RequiresRegister());
5848 // When read barriers are enabled, we need a temporary register for
5849 // some cases.
5850 if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
5851 locations->AddTemp(Location::RequiresRegister());
5852 }
5853 }
5854
VisitInstanceOf(HInstanceOf * instruction)5855 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5856 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5857 LocationSummary* locations = instruction->GetLocations();
5858 Location obj_loc = locations->InAt(0);
5859 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5860 Location cls = locations->InAt(1);
5861 Location out_loc = locations->Out();
5862 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5863 Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
5864 locations->GetTemp(0) :
5865 Location::NoLocation();
5866 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5867 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5868 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5869 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5870 SlowPathCode* slow_path = nullptr;
5871 NearLabel done, zero;
5872
5873 // Return 0 if `obj` is null.
5874 // Avoid null check if we know obj is not null.
5875 if (instruction->MustDoNullCheck()) {
5876 __ testl(obj, obj);
5877 __ j(kEqual, &zero);
5878 }
5879
5880 switch (type_check_kind) {
5881 case TypeCheckKind::kExactCheck: {
5882 ReadBarrierOption read_barrier_option =
5883 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5884 // /* HeapReference<Class> */ out = obj->klass_
5885 GenerateReferenceLoadTwoRegisters(instruction,
5886 out_loc,
5887 obj_loc,
5888 class_offset,
5889 read_barrier_option);
5890 if (cls.IsRegister()) {
5891 __ cmpl(out, cls.AsRegister<CpuRegister>());
5892 } else {
5893 DCHECK(cls.IsStackSlot()) << cls;
5894 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5895 }
5896 if (zero.IsLinked()) {
5897 // Classes must be equal for the instanceof to succeed.
5898 __ j(kNotEqual, &zero);
5899 __ movl(out, Immediate(1));
5900 __ jmp(&done);
5901 } else {
5902 __ setcc(kEqual, out);
5903 // setcc only sets the low byte.
5904 __ andl(out, Immediate(1));
5905 }
5906 break;
5907 }
5908
5909 case TypeCheckKind::kAbstractClassCheck: {
5910 ReadBarrierOption read_barrier_option =
5911 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5912 // /* HeapReference<Class> */ out = obj->klass_
5913 GenerateReferenceLoadTwoRegisters(instruction,
5914 out_loc,
5915 obj_loc,
5916 class_offset,
5917 read_barrier_option);
5918 // If the class is abstract, we eagerly fetch the super class of the
5919 // object to avoid doing a comparison we know will fail.
5920 NearLabel loop, success;
5921 __ Bind(&loop);
5922 // /* HeapReference<Class> */ out = out->super_class_
5923 GenerateReferenceLoadOneRegister(instruction,
5924 out_loc,
5925 super_offset,
5926 maybe_temp_loc,
5927 read_barrier_option);
5928 __ testl(out, out);
5929 // If `out` is null, we use it for the result, and jump to `done`.
5930 __ j(kEqual, &done);
5931 if (cls.IsRegister()) {
5932 __ cmpl(out, cls.AsRegister<CpuRegister>());
5933 } else {
5934 DCHECK(cls.IsStackSlot()) << cls;
5935 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5936 }
5937 __ j(kNotEqual, &loop);
5938 __ movl(out, Immediate(1));
5939 if (zero.IsLinked()) {
5940 __ jmp(&done);
5941 }
5942 break;
5943 }
5944
5945 case TypeCheckKind::kClassHierarchyCheck: {
5946 ReadBarrierOption read_barrier_option =
5947 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5948 // /* HeapReference<Class> */ out = obj->klass_
5949 GenerateReferenceLoadTwoRegisters(instruction,
5950 out_loc,
5951 obj_loc,
5952 class_offset,
5953 read_barrier_option);
5954 // Walk over the class hierarchy to find a match.
5955 NearLabel loop, success;
5956 __ Bind(&loop);
5957 if (cls.IsRegister()) {
5958 __ cmpl(out, cls.AsRegister<CpuRegister>());
5959 } else {
5960 DCHECK(cls.IsStackSlot()) << cls;
5961 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5962 }
5963 __ j(kEqual, &success);
5964 // /* HeapReference<Class> */ out = out->super_class_
5965 GenerateReferenceLoadOneRegister(instruction,
5966 out_loc,
5967 super_offset,
5968 maybe_temp_loc,
5969 read_barrier_option);
5970 __ testl(out, out);
5971 __ j(kNotEqual, &loop);
5972 // If `out` is null, we use it for the result, and jump to `done`.
5973 __ jmp(&done);
5974 __ Bind(&success);
5975 __ movl(out, Immediate(1));
5976 if (zero.IsLinked()) {
5977 __ jmp(&done);
5978 }
5979 break;
5980 }
5981
5982 case TypeCheckKind::kArrayObjectCheck: {
5983 ReadBarrierOption read_barrier_option =
5984 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
5985 // /* HeapReference<Class> */ out = obj->klass_
5986 GenerateReferenceLoadTwoRegisters(instruction,
5987 out_loc,
5988 obj_loc,
5989 class_offset,
5990 read_barrier_option);
5991 // Do an exact check.
5992 NearLabel exact_check;
5993 if (cls.IsRegister()) {
5994 __ cmpl(out, cls.AsRegister<CpuRegister>());
5995 } else {
5996 DCHECK(cls.IsStackSlot()) << cls;
5997 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5998 }
5999 __ j(kEqual, &exact_check);
6000 // Otherwise, we need to check that the object's class is a non-primitive array.
6001 // /* HeapReference<Class> */ out = out->component_type_
6002 GenerateReferenceLoadOneRegister(instruction,
6003 out_loc,
6004 component_offset,
6005 maybe_temp_loc,
6006 read_barrier_option);
6007 __ testl(out, out);
6008 // If `out` is null, we use it for the result, and jump to `done`.
6009 __ j(kEqual, &done);
6010 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6011 __ j(kNotEqual, &zero);
6012 __ Bind(&exact_check);
6013 __ movl(out, Immediate(1));
6014 __ jmp(&done);
6015 break;
6016 }
6017
6018 case TypeCheckKind::kArrayCheck: {
6019 // No read barrier since the slow path will retry upon failure.
6020 // /* HeapReference<Class> */ out = obj->klass_
6021 GenerateReferenceLoadTwoRegisters(instruction,
6022 out_loc,
6023 obj_loc,
6024 class_offset,
6025 kWithoutReadBarrier);
6026 if (cls.IsRegister()) {
6027 __ cmpl(out, cls.AsRegister<CpuRegister>());
6028 } else {
6029 DCHECK(cls.IsStackSlot()) << cls;
6030 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6031 }
6032 DCHECK(locations->OnlyCallsOnSlowPath());
6033 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6034 instruction, /* is_fatal */ false);
6035 codegen_->AddSlowPath(slow_path);
6036 __ j(kNotEqual, slow_path->GetEntryLabel());
6037 __ movl(out, Immediate(1));
6038 if (zero.IsLinked()) {
6039 __ jmp(&done);
6040 }
6041 break;
6042 }
6043
6044 case TypeCheckKind::kUnresolvedCheck:
6045 case TypeCheckKind::kInterfaceCheck: {
6046 // Note that we indeed only call on slow path, but we always go
6047 // into the slow path for the unresolved and interface check
6048 // cases.
6049 //
6050 // We cannot directly call the InstanceofNonTrivial runtime
6051 // entry point without resorting to a type checking slow path
6052 // here (i.e. by calling InvokeRuntime directly), as it would
6053 // require to assign fixed registers for the inputs of this
6054 // HInstanceOf instruction (following the runtime calling
6055 // convention), which might be cluttered by the potential first
6056 // read barrier emission at the beginning of this method.
6057 //
6058 // TODO: Introduce a new runtime entry point taking the object
6059 // to test (instead of its class) as argument, and let it deal
6060 // with the read barrier issues. This will let us refactor this
6061 // case of the `switch` code as it was previously (with a direct
6062 // call to the runtime not using a type checking slow path).
6063 // This should also be beneficial for the other cases above.
6064 DCHECK(locations->OnlyCallsOnSlowPath());
6065 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6066 instruction, /* is_fatal */ false);
6067 codegen_->AddSlowPath(slow_path);
6068 __ jmp(slow_path->GetEntryLabel());
6069 if (zero.IsLinked()) {
6070 __ jmp(&done);
6071 }
6072 break;
6073 }
6074 }
6075
6076 if (zero.IsLinked()) {
6077 __ Bind(&zero);
6078 __ xorl(out, out);
6079 }
6080
6081 if (done.IsLinked()) {
6082 __ Bind(&done);
6083 }
6084
6085 if (slow_path != nullptr) {
6086 __ Bind(slow_path->GetExitLabel());
6087 }
6088 }
6089
VisitCheckCast(HCheckCast * instruction)6090 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6091 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6092 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6093 LocationSummary* locations =
6094 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6095 locations->SetInAt(0, Location::RequiresRegister());
6096 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6097 // Require a register for the interface check since there is a loop that compares the class to
6098 // a memory address.
6099 locations->SetInAt(1, Location::RequiresRegister());
6100 } else {
6101 locations->SetInAt(1, Location::Any());
6102 }
6103
6104 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
6105 locations->AddTemp(Location::RequiresRegister());
6106 // When read barriers are enabled, we need an additional temporary
6107 // register for some cases.
6108 if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
6109 locations->AddTemp(Location::RequiresRegister());
6110 }
6111 }
6112
VisitCheckCast(HCheckCast * instruction)6113 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6114 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6115 LocationSummary* locations = instruction->GetLocations();
6116 Location obj_loc = locations->InAt(0);
6117 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6118 Location cls = locations->InAt(1);
6119 Location temp_loc = locations->GetTemp(0);
6120 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6121 Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
6122 locations->GetTemp(1) :
6123 Location::NoLocation();
6124 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6125 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6126 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6127 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6128 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6129 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6130 const uint32_t object_array_data_offset =
6131 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6132
6133 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6134 SlowPathCode* type_check_slow_path =
6135 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6136 instruction, is_type_check_slow_path_fatal);
6137 codegen_->AddSlowPath(type_check_slow_path);
6138
6139
6140 NearLabel done;
6141 // Avoid null check if we know obj is not null.
6142 if (instruction->MustDoNullCheck()) {
6143 __ testl(obj, obj);
6144 __ j(kEqual, &done);
6145 }
6146
6147 switch (type_check_kind) {
6148 case TypeCheckKind::kExactCheck:
6149 case TypeCheckKind::kArrayCheck: {
6150 // /* HeapReference<Class> */ temp = obj->klass_
6151 GenerateReferenceLoadTwoRegisters(instruction,
6152 temp_loc,
6153 obj_loc,
6154 class_offset,
6155 kWithoutReadBarrier);
6156 if (cls.IsRegister()) {
6157 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6158 } else {
6159 DCHECK(cls.IsStackSlot()) << cls;
6160 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6161 }
6162 // Jump to slow path for throwing the exception or doing a
6163 // more involved array check.
6164 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6165 break;
6166 }
6167
6168 case TypeCheckKind::kAbstractClassCheck: {
6169 // /* HeapReference<Class> */ temp = obj->klass_
6170 GenerateReferenceLoadTwoRegisters(instruction,
6171 temp_loc,
6172 obj_loc,
6173 class_offset,
6174 kWithoutReadBarrier);
6175 // If the class is abstract, we eagerly fetch the super class of the
6176 // object to avoid doing a comparison we know will fail.
6177 NearLabel loop;
6178 __ Bind(&loop);
6179 // /* HeapReference<Class> */ temp = temp->super_class_
6180 GenerateReferenceLoadOneRegister(instruction,
6181 temp_loc,
6182 super_offset,
6183 maybe_temp2_loc,
6184 kWithoutReadBarrier);
6185
6186 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6187 // exception.
6188 __ testl(temp, temp);
6189 // Otherwise, compare the classes.
6190 __ j(kZero, type_check_slow_path->GetEntryLabel());
6191 if (cls.IsRegister()) {
6192 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6193 } else {
6194 DCHECK(cls.IsStackSlot()) << cls;
6195 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6196 }
6197 __ j(kNotEqual, &loop);
6198 break;
6199 }
6200
6201 case TypeCheckKind::kClassHierarchyCheck: {
6202 // /* HeapReference<Class> */ temp = obj->klass_
6203 GenerateReferenceLoadTwoRegisters(instruction,
6204 temp_loc,
6205 obj_loc,
6206 class_offset,
6207 kWithoutReadBarrier);
6208 // Walk over the class hierarchy to find a match.
6209 NearLabel loop;
6210 __ Bind(&loop);
6211 if (cls.IsRegister()) {
6212 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6213 } else {
6214 DCHECK(cls.IsStackSlot()) << cls;
6215 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6216 }
6217 __ j(kEqual, &done);
6218
6219 // /* HeapReference<Class> */ temp = temp->super_class_
6220 GenerateReferenceLoadOneRegister(instruction,
6221 temp_loc,
6222 super_offset,
6223 maybe_temp2_loc,
6224 kWithoutReadBarrier);
6225
6226 // If the class reference currently in `temp` is not null, jump
6227 // back at the beginning of the loop.
6228 __ testl(temp, temp);
6229 __ j(kNotZero, &loop);
6230 // Otherwise, jump to the slow path to throw the exception.
6231 __ jmp(type_check_slow_path->GetEntryLabel());
6232 break;
6233 }
6234
6235 case TypeCheckKind::kArrayObjectCheck: {
6236 // /* HeapReference<Class> */ temp = obj->klass_
6237 GenerateReferenceLoadTwoRegisters(instruction,
6238 temp_loc,
6239 obj_loc,
6240 class_offset,
6241 kWithoutReadBarrier);
6242 // Do an exact check.
6243 NearLabel check_non_primitive_component_type;
6244 if (cls.IsRegister()) {
6245 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6246 } else {
6247 DCHECK(cls.IsStackSlot()) << cls;
6248 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6249 }
6250 __ j(kEqual, &done);
6251
6252 // Otherwise, we need to check that the object's class is a non-primitive array.
6253 // /* HeapReference<Class> */ temp = temp->component_type_
6254 GenerateReferenceLoadOneRegister(instruction,
6255 temp_loc,
6256 component_offset,
6257 maybe_temp2_loc,
6258 kWithoutReadBarrier);
6259
6260 // If the component type is not null (i.e. the object is indeed
6261 // an array), jump to label `check_non_primitive_component_type`
6262 // to further check that this component type is not a primitive
6263 // type.
6264 __ testl(temp, temp);
6265 // Otherwise, jump to the slow path to throw the exception.
6266 __ j(kZero, type_check_slow_path->GetEntryLabel());
6267 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6268 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6269 break;
6270 }
6271
6272 case TypeCheckKind::kUnresolvedCheck: {
6273 // We always go into the type check slow path for the unresolved case.
6274 //
6275 // We cannot directly call the CheckCast runtime entry point
6276 // without resorting to a type checking slow path here (i.e. by
6277 // calling InvokeRuntime directly), as it would require to
6278 // assign fixed registers for the inputs of this HInstanceOf
6279 // instruction (following the runtime calling convention), which
6280 // might be cluttered by the potential first read barrier
6281 // emission at the beginning of this method.
6282 __ jmp(type_check_slow_path->GetEntryLabel());
6283 break;
6284 }
6285
6286 case TypeCheckKind::kInterfaceCheck:
6287 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6288 // We can not get false positives by doing this.
6289 // /* HeapReference<Class> */ temp = obj->klass_
6290 GenerateReferenceLoadTwoRegisters(instruction,
6291 temp_loc,
6292 obj_loc,
6293 class_offset,
6294 kWithoutReadBarrier);
6295
6296 // /* HeapReference<Class> */ temp = temp->iftable_
6297 GenerateReferenceLoadTwoRegisters(instruction,
6298 temp_loc,
6299 temp_loc,
6300 iftable_offset,
6301 kWithoutReadBarrier);
6302 // Iftable is never null.
6303 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6304 // Maybe poison the `cls` for direct comparison with memory.
6305 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6306 // Loop through the iftable and check if any class matches.
6307 NearLabel start_loop;
6308 __ Bind(&start_loop);
6309 // Need to subtract first to handle the empty array case.
6310 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6311 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6312 // Go to next interface if the classes do not match.
6313 __ cmpl(cls.AsRegister<CpuRegister>(),
6314 CodeGeneratorX86_64::ArrayAddress(temp,
6315 maybe_temp2_loc,
6316 TIMES_4,
6317 object_array_data_offset));
6318 __ j(kNotEqual, &start_loop); // Return if same class.
6319 // If `cls` was poisoned above, unpoison it.
6320 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6321 break;
6322 }
6323
6324 if (done.IsLinked()) {
6325 __ Bind(&done);
6326 }
6327
6328 __ Bind(type_check_slow_path->GetExitLabel());
6329 }
6330
VisitMonitorOperation(HMonitorOperation * instruction)6331 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6332 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6333 instruction, LocationSummary::kCallOnMainOnly);
6334 InvokeRuntimeCallingConvention calling_convention;
6335 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6336 }
6337
VisitMonitorOperation(HMonitorOperation * instruction)6338 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6339 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6340 instruction,
6341 instruction->GetDexPc());
6342 if (instruction->IsEnter()) {
6343 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6344 } else {
6345 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6346 }
6347 }
6348
VisitAnd(HAnd * instruction)6349 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6350 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6351 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6352
HandleBitwiseOperation(HBinaryOperation * instruction)6353 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6354 LocationSummary* locations =
6355 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6356 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6357 || instruction->GetResultType() == DataType::Type::kInt64);
6358 locations->SetInAt(0, Location::RequiresRegister());
6359 locations->SetInAt(1, Location::Any());
6360 locations->SetOut(Location::SameAsFirstInput());
6361 }
6362
VisitAnd(HAnd * instruction)6363 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6364 HandleBitwiseOperation(instruction);
6365 }
6366
VisitOr(HOr * instruction)6367 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6368 HandleBitwiseOperation(instruction);
6369 }
6370
VisitXor(HXor * instruction)6371 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6372 HandleBitwiseOperation(instruction);
6373 }
6374
HandleBitwiseOperation(HBinaryOperation * instruction)6375 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6376 LocationSummary* locations = instruction->GetLocations();
6377 Location first = locations->InAt(0);
6378 Location second = locations->InAt(1);
6379 DCHECK(first.Equals(locations->Out()));
6380
6381 if (instruction->GetResultType() == DataType::Type::kInt32) {
6382 if (second.IsRegister()) {
6383 if (instruction->IsAnd()) {
6384 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6385 } else if (instruction->IsOr()) {
6386 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6387 } else {
6388 DCHECK(instruction->IsXor());
6389 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6390 }
6391 } else if (second.IsConstant()) {
6392 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6393 if (instruction->IsAnd()) {
6394 __ andl(first.AsRegister<CpuRegister>(), imm);
6395 } else if (instruction->IsOr()) {
6396 __ orl(first.AsRegister<CpuRegister>(), imm);
6397 } else {
6398 DCHECK(instruction->IsXor());
6399 __ xorl(first.AsRegister<CpuRegister>(), imm);
6400 }
6401 } else {
6402 Address address(CpuRegister(RSP), second.GetStackIndex());
6403 if (instruction->IsAnd()) {
6404 __ andl(first.AsRegister<CpuRegister>(), address);
6405 } else if (instruction->IsOr()) {
6406 __ orl(first.AsRegister<CpuRegister>(), address);
6407 } else {
6408 DCHECK(instruction->IsXor());
6409 __ xorl(first.AsRegister<CpuRegister>(), address);
6410 }
6411 }
6412 } else {
6413 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
6414 CpuRegister first_reg = first.AsRegister<CpuRegister>();
6415 bool second_is_constant = false;
6416 int64_t value = 0;
6417 if (second.IsConstant()) {
6418 second_is_constant = true;
6419 value = second.GetConstant()->AsLongConstant()->GetValue();
6420 }
6421 bool is_int32_value = IsInt<32>(value);
6422
6423 if (instruction->IsAnd()) {
6424 if (second_is_constant) {
6425 if (is_int32_value) {
6426 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6427 } else {
6428 __ andq(first_reg, codegen_->LiteralInt64Address(value));
6429 }
6430 } else if (second.IsDoubleStackSlot()) {
6431 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6432 } else {
6433 __ andq(first_reg, second.AsRegister<CpuRegister>());
6434 }
6435 } else if (instruction->IsOr()) {
6436 if (second_is_constant) {
6437 if (is_int32_value) {
6438 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6439 } else {
6440 __ orq(first_reg, codegen_->LiteralInt64Address(value));
6441 }
6442 } else if (second.IsDoubleStackSlot()) {
6443 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6444 } else {
6445 __ orq(first_reg, second.AsRegister<CpuRegister>());
6446 }
6447 } else {
6448 DCHECK(instruction->IsXor());
6449 if (second_is_constant) {
6450 if (is_int32_value) {
6451 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6452 } else {
6453 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6454 }
6455 } else if (second.IsDoubleStackSlot()) {
6456 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6457 } else {
6458 __ xorq(first_reg, second.AsRegister<CpuRegister>());
6459 }
6460 }
6461 }
6462 }
6463
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6464 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6465 HInstruction* instruction,
6466 Location out,
6467 uint32_t offset,
6468 Location maybe_temp,
6469 ReadBarrierOption read_barrier_option) {
6470 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6471 if (read_barrier_option == kWithReadBarrier) {
6472 CHECK(kEmitCompilerReadBarrier);
6473 if (kUseBakerReadBarrier) {
6474 // Load with fast path based Baker's read barrier.
6475 // /* HeapReference<Object> */ out = *(out + offset)
6476 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6477 instruction, out, out_reg, offset, /* needs_null_check */ false);
6478 } else {
6479 // Load with slow path based read barrier.
6480 // Save the value of `out` into `maybe_temp` before overwriting it
6481 // in the following move operation, as we will need it for the
6482 // read barrier below.
6483 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6484 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6485 // /* HeapReference<Object> */ out = *(out + offset)
6486 __ movl(out_reg, Address(out_reg, offset));
6487 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6488 }
6489 } else {
6490 // Plain load with no read barrier.
6491 // /* HeapReference<Object> */ out = *(out + offset)
6492 __ movl(out_reg, Address(out_reg, offset));
6493 __ MaybeUnpoisonHeapReference(out_reg);
6494 }
6495 }
6496
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)6497 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6498 HInstruction* instruction,
6499 Location out,
6500 Location obj,
6501 uint32_t offset,
6502 ReadBarrierOption read_barrier_option) {
6503 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6504 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6505 if (read_barrier_option == kWithReadBarrier) {
6506 CHECK(kEmitCompilerReadBarrier);
6507 if (kUseBakerReadBarrier) {
6508 // Load with fast path based Baker's read barrier.
6509 // /* HeapReference<Object> */ out = *(obj + offset)
6510 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6511 instruction, out, obj_reg, offset, /* needs_null_check */ false);
6512 } else {
6513 // Load with slow path based read barrier.
6514 // /* HeapReference<Object> */ out = *(obj + offset)
6515 __ movl(out_reg, Address(obj_reg, offset));
6516 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6517 }
6518 } else {
6519 // Plain load with no read barrier.
6520 // /* HeapReference<Object> */ out = *(obj + offset)
6521 __ movl(out_reg, Address(obj_reg, offset));
6522 __ MaybeUnpoisonHeapReference(out_reg);
6523 }
6524 }
6525
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)6526 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
6527 HInstruction* instruction,
6528 Location root,
6529 const Address& address,
6530 Label* fixup_label,
6531 ReadBarrierOption read_barrier_option) {
6532 CpuRegister root_reg = root.AsRegister<CpuRegister>();
6533 if (read_barrier_option == kWithReadBarrier) {
6534 DCHECK(kEmitCompilerReadBarrier);
6535 if (kUseBakerReadBarrier) {
6536 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6537 // Baker's read barrier are used:
6538 //
6539 // root = obj.field;
6540 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6541 // if (temp != null) {
6542 // root = temp(root)
6543 // }
6544
6545 // /* GcRoot<mirror::Object> */ root = *address
6546 __ movl(root_reg, address);
6547 if (fixup_label != nullptr) {
6548 __ Bind(fixup_label);
6549 }
6550 static_assert(
6551 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6552 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6553 "have different sizes.");
6554 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6555 "art::mirror::CompressedReference<mirror::Object> and int32_t "
6556 "have different sizes.");
6557
6558 // Slow path marking the GC root `root`.
6559 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
6560 instruction, root, /* unpoison_ref_before_marking */ false);
6561 codegen_->AddSlowPath(slow_path);
6562
6563 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
6564 const int32_t entry_point_offset =
6565 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
6566 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
6567 // The entrypoint is null when the GC is not marking.
6568 __ j(kNotEqual, slow_path->GetEntryLabel());
6569 __ Bind(slow_path->GetExitLabel());
6570 } else {
6571 // GC root loaded through a slow path for read barriers other
6572 // than Baker's.
6573 // /* GcRoot<mirror::Object>* */ root = address
6574 __ leaq(root_reg, address);
6575 if (fixup_label != nullptr) {
6576 __ Bind(fixup_label);
6577 }
6578 // /* mirror::Object* */ root = root->Read()
6579 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6580 }
6581 } else {
6582 // Plain GC root load with no read barrier.
6583 // /* GcRoot<mirror::Object> */ root = *address
6584 __ movl(root_reg, address);
6585 if (fixup_label != nullptr) {
6586 __ Bind(fixup_label);
6587 }
6588 // Note that GC roots are not affected by heap poisoning, thus we
6589 // do not have to unpoison `root_reg` here.
6590 }
6591 }
6592
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)6593 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6594 Location ref,
6595 CpuRegister obj,
6596 uint32_t offset,
6597 bool needs_null_check) {
6598 DCHECK(kEmitCompilerReadBarrier);
6599 DCHECK(kUseBakerReadBarrier);
6600
6601 // /* HeapReference<Object> */ ref = *(obj + offset)
6602 Address src(obj, offset);
6603 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6604 }
6605
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)6606 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6607 Location ref,
6608 CpuRegister obj,
6609 uint32_t data_offset,
6610 Location index,
6611 bool needs_null_check) {
6612 DCHECK(kEmitCompilerReadBarrier);
6613 DCHECK(kUseBakerReadBarrier);
6614
6615 static_assert(
6616 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6617 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6618 // /* HeapReference<Object> */ ref =
6619 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6620 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
6621 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6622 }
6623
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)6624 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6625 Location ref,
6626 CpuRegister obj,
6627 const Address& src,
6628 bool needs_null_check,
6629 bool always_update_field,
6630 CpuRegister* temp1,
6631 CpuRegister* temp2) {
6632 DCHECK(kEmitCompilerReadBarrier);
6633 DCHECK(kUseBakerReadBarrier);
6634
6635 // In slow path based read barriers, the read barrier call is
6636 // inserted after the original load. However, in fast path based
6637 // Baker's read barriers, we need to perform the load of
6638 // mirror::Object::monitor_ *before* the original reference load.
6639 // This load-load ordering is required by the read barrier.
6640 // The fast path/slow path (for Baker's algorithm) should look like:
6641 //
6642 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6643 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6644 // HeapReference<Object> ref = *src; // Original reference load.
6645 // bool is_gray = (rb_state == ReadBarrier::GrayState());
6646 // if (is_gray) {
6647 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
6648 // }
6649 //
6650 // Note: the original implementation in ReadBarrier::Barrier is
6651 // slightly more complex as:
6652 // - it implements the load-load fence using a data dependency on
6653 // the high-bits of rb_state, which are expected to be all zeroes
6654 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6655 // here, which is a no-op thanks to the x86-64 memory model);
6656 // - it performs additional checks that we do not do here for
6657 // performance reasons.
6658
6659 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6660 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6661
6662 // Given the numeric representation, it's enough to check the low bit of the rb_state.
6663 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
6664 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6665 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
6666 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
6667 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
6668
6669 // if (rb_state == ReadBarrier::GrayState())
6670 // ref = ReadBarrier::Mark(ref);
6671 // At this point, just do the "if" and make sure that flags are preserved until the branch.
6672 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
6673 if (needs_null_check) {
6674 MaybeRecordImplicitNullCheck(instruction);
6675 }
6676
6677 // Load fence to prevent load-load reordering.
6678 // Note that this is a no-op, thanks to the x86-64 memory model.
6679 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6680
6681 // The actual reference load.
6682 // /* HeapReference<Object> */ ref = *src
6683 __ movl(ref_reg, src); // Flags are unaffected.
6684
6685 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
6686 // Slow path marking the object `ref` when it is gray.
6687 SlowPathCode* slow_path;
6688 if (always_update_field) {
6689 DCHECK(temp1 != nullptr);
6690 DCHECK(temp2 != nullptr);
6691 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
6692 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
6693 } else {
6694 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
6695 instruction, ref, /* unpoison_ref_before_marking */ true);
6696 }
6697 AddSlowPath(slow_path);
6698
6699 // We have done the "if" of the gray bit check above, now branch based on the flags.
6700 __ j(kNotZero, slow_path->GetEntryLabel());
6701
6702 // Object* ref = ref_addr->AsMirrorPtr()
6703 __ MaybeUnpoisonHeapReference(ref_reg);
6704
6705 __ Bind(slow_path->GetExitLabel());
6706 }
6707
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6708 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6709 Location out,
6710 Location ref,
6711 Location obj,
6712 uint32_t offset,
6713 Location index) {
6714 DCHECK(kEmitCompilerReadBarrier);
6715
6716 // Insert a slow path based read barrier *after* the reference load.
6717 //
6718 // If heap poisoning is enabled, the unpoisoning of the loaded
6719 // reference will be carried out by the runtime within the slow
6720 // path.
6721 //
6722 // Note that `ref` currently does not get unpoisoned (when heap
6723 // poisoning is enabled), which is alright as the `ref` argument is
6724 // not used by the artReadBarrierSlow entry point.
6725 //
6726 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6727 SlowPathCode* slow_path = new (GetScopedAllocator())
6728 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6729 AddSlowPath(slow_path);
6730
6731 __ jmp(slow_path->GetEntryLabel());
6732 __ Bind(slow_path->GetExitLabel());
6733 }
6734
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6735 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6736 Location out,
6737 Location ref,
6738 Location obj,
6739 uint32_t offset,
6740 Location index) {
6741 if (kEmitCompilerReadBarrier) {
6742 // Baker's read barriers shall be handled by the fast path
6743 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6744 DCHECK(!kUseBakerReadBarrier);
6745 // If heap poisoning is enabled, unpoisoning will be taken care of
6746 // by the runtime within the slow path.
6747 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6748 } else if (kPoisonHeapReferences) {
6749 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6750 }
6751 }
6752
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6753 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6754 Location out,
6755 Location root) {
6756 DCHECK(kEmitCompilerReadBarrier);
6757
6758 // Insert a slow path based read barrier *after* the GC root load.
6759 //
6760 // Note that GC roots are not affected by heap poisoning, so we do
6761 // not need to do anything special for this here.
6762 SlowPathCode* slow_path =
6763 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6764 AddSlowPath(slow_path);
6765
6766 __ jmp(slow_path->GetEntryLabel());
6767 __ Bind(slow_path->GetExitLabel());
6768 }
6769
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6770 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6771 // Nothing to do, this should be removed during prepare for register allocator.
6772 LOG(FATAL) << "Unreachable";
6773 }
6774
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6775 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6776 // Nothing to do, this should be removed during prepare for register allocator.
6777 LOG(FATAL) << "Unreachable";
6778 }
6779
6780 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6781 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6782 LocationSummary* locations =
6783 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6784 locations->SetInAt(0, Location::RequiresRegister());
6785 locations->AddTemp(Location::RequiresRegister());
6786 locations->AddTemp(Location::RequiresRegister());
6787 }
6788
VisitPackedSwitch(HPackedSwitch * switch_instr)6789 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6790 int32_t lower_bound = switch_instr->GetStartValue();
6791 uint32_t num_entries = switch_instr->GetNumEntries();
6792 LocationSummary* locations = switch_instr->GetLocations();
6793 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6794 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6795 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6796 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6797
6798 // Should we generate smaller inline compare/jumps?
6799 if (num_entries <= kPackedSwitchJumpTableThreshold) {
6800 // Figure out the correct compare values and jump conditions.
6801 // Handle the first compare/branch as a special case because it might
6802 // jump to the default case.
6803 DCHECK_GT(num_entries, 2u);
6804 Condition first_condition;
6805 uint32_t index;
6806 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6807 if (lower_bound != 0) {
6808 first_condition = kLess;
6809 __ cmpl(value_reg_in, Immediate(lower_bound));
6810 __ j(first_condition, codegen_->GetLabelOf(default_block));
6811 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6812
6813 index = 1;
6814 } else {
6815 // Handle all the compare/jumps below.
6816 first_condition = kBelow;
6817 index = 0;
6818 }
6819
6820 // Handle the rest of the compare/jumps.
6821 for (; index + 1 < num_entries; index += 2) {
6822 int32_t compare_to_value = lower_bound + index + 1;
6823 __ cmpl(value_reg_in, Immediate(compare_to_value));
6824 // Jump to successors[index] if value < case_value[index].
6825 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6826 // Jump to successors[index + 1] if value == case_value[index + 1].
6827 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6828 }
6829
6830 if (index != num_entries) {
6831 // There are an odd number of entries. Handle the last one.
6832 DCHECK_EQ(index + 1, num_entries);
6833 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6834 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6835 }
6836
6837 // And the default for any other value.
6838 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6839 __ jmp(codegen_->GetLabelOf(default_block));
6840 }
6841 return;
6842 }
6843
6844 // Remove the bias, if needed.
6845 Register value_reg_out = value_reg_in.AsRegister();
6846 if (lower_bound != 0) {
6847 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6848 value_reg_out = temp_reg.AsRegister();
6849 }
6850 CpuRegister value_reg(value_reg_out);
6851
6852 // Is the value in range?
6853 __ cmpl(value_reg, Immediate(num_entries - 1));
6854 __ j(kAbove, codegen_->GetLabelOf(default_block));
6855
6856 // We are in the range of the table.
6857 // Load the address of the jump table in the constant area.
6858 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6859
6860 // Load the (signed) offset from the jump table.
6861 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6862
6863 // Add the offset to the address of the table base.
6864 __ addq(temp_reg, base_reg);
6865
6866 // And jump.
6867 __ jmp(temp_reg);
6868 }
6869
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)6870 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
6871 ATTRIBUTE_UNUSED) {
6872 LOG(FATAL) << "Unreachable";
6873 }
6874
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)6875 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
6876 ATTRIBUTE_UNUSED) {
6877 LOG(FATAL) << "Unreachable";
6878 }
6879
Load32BitValue(CpuRegister dest,int32_t value)6880 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6881 if (value == 0) {
6882 __ xorl(dest, dest);
6883 } else {
6884 __ movl(dest, Immediate(value));
6885 }
6886 }
6887
Load64BitValue(CpuRegister dest,int64_t value)6888 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6889 if (value == 0) {
6890 // Clears upper bits too.
6891 __ xorl(dest, dest);
6892 } else if (IsUint<32>(value)) {
6893 // We can use a 32 bit move, as it will zero-extend and is shorter.
6894 __ movl(dest, Immediate(static_cast<int32_t>(value)));
6895 } else {
6896 __ movq(dest, Immediate(value));
6897 }
6898 }
6899
Load32BitValue(XmmRegister dest,int32_t value)6900 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6901 if (value == 0) {
6902 __ xorps(dest, dest);
6903 } else {
6904 __ movss(dest, LiteralInt32Address(value));
6905 }
6906 }
6907
Load64BitValue(XmmRegister dest,int64_t value)6908 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6909 if (value == 0) {
6910 __ xorpd(dest, dest);
6911 } else {
6912 __ movsd(dest, LiteralInt64Address(value));
6913 }
6914 }
6915
Load32BitValue(XmmRegister dest,float value)6916 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6917 Load32BitValue(dest, bit_cast<int32_t, float>(value));
6918 }
6919
Load64BitValue(XmmRegister dest,double value)6920 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6921 Load64BitValue(dest, bit_cast<int64_t, double>(value));
6922 }
6923
Compare32BitValue(CpuRegister dest,int32_t value)6924 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6925 if (value == 0) {
6926 __ testl(dest, dest);
6927 } else {
6928 __ cmpl(dest, Immediate(value));
6929 }
6930 }
6931
Compare64BitValue(CpuRegister dest,int64_t value)6932 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6933 if (IsInt<32>(value)) {
6934 if (value == 0) {
6935 __ testq(dest, dest);
6936 } else {
6937 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6938 }
6939 } else {
6940 // Value won't fit in an int.
6941 __ cmpq(dest, LiteralInt64Address(value));
6942 }
6943 }
6944
GenerateIntCompare(Location lhs,Location rhs)6945 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
6946 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6947 GenerateIntCompare(lhs_reg, rhs);
6948 }
6949
GenerateIntCompare(CpuRegister lhs,Location rhs)6950 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
6951 if (rhs.IsConstant()) {
6952 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
6953 Compare32BitValue(lhs, value);
6954 } else if (rhs.IsStackSlot()) {
6955 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6956 } else {
6957 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
6958 }
6959 }
6960
GenerateLongCompare(Location lhs,Location rhs)6961 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
6962 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6963 if (rhs.IsConstant()) {
6964 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
6965 Compare64BitValue(lhs_reg, value);
6966 } else if (rhs.IsDoubleStackSlot()) {
6967 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6968 } else {
6969 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
6970 }
6971 }
6972
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)6973 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
6974 Location index,
6975 ScaleFactor scale,
6976 uint32_t data_offset) {
6977 return index.IsConstant() ?
6978 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
6979 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
6980 }
6981
Store64BitValueToStack(Location dest,int64_t value)6982 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6983 DCHECK(dest.IsDoubleStackSlot());
6984 if (IsInt<32>(value)) {
6985 // Can move directly as an int32 constant.
6986 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6987 Immediate(static_cast<int32_t>(value)));
6988 } else {
6989 Load64BitValue(CpuRegister(TMP), value);
6990 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6991 }
6992 }
6993
6994 /**
6995 * Class to handle late fixup of offsets into constant area.
6996 */
6997 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6998 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6999 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7000 : codegen_(&codegen), offset_into_constant_area_(offset) {}
7001
7002 protected:
SetOffset(size_t offset)7003 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7004
7005 CodeGeneratorX86_64* codegen_;
7006
7007 private:
Process(const MemoryRegion & region,int pos)7008 void Process(const MemoryRegion& region, int pos) OVERRIDE {
7009 // Patch the correct offset for the instruction. We use the address of the
7010 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7011 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7012 int32_t relative_position = constant_offset - pos;
7013
7014 // Patch in the right value.
7015 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7016 }
7017
7018 // Location in constant area that the fixup refers to.
7019 size_t offset_into_constant_area_;
7020 };
7021
7022 /**
7023 t * Class to handle late fixup of offsets to a jump table that will be created in the
7024 * constant area.
7025 */
7026 class JumpTableRIPFixup : public RIPFixup {
7027 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7028 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7029 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7030
CreateJumpTable()7031 void CreateJumpTable() {
7032 X86_64Assembler* assembler = codegen_->GetAssembler();
7033
7034 // Ensure that the reference to the jump table has the correct offset.
7035 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7036 SetOffset(offset_in_constant_table);
7037
7038 // Compute the offset from the start of the function to this jump table.
7039 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7040
7041 // Populate the jump table with the correct values for the jump table.
7042 int32_t num_entries = switch_instr_->GetNumEntries();
7043 HBasicBlock* block = switch_instr_->GetBlock();
7044 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7045 // The value that we want is the target offset - the position of the table.
7046 for (int32_t i = 0; i < num_entries; i++) {
7047 HBasicBlock* b = successors[i];
7048 Label* l = codegen_->GetLabelOf(b);
7049 DCHECK(l->IsBound());
7050 int32_t offset_to_block = l->Position() - current_table_offset;
7051 assembler->AppendInt32(offset_to_block);
7052 }
7053 }
7054
7055 private:
7056 const HPackedSwitch* switch_instr_;
7057 };
7058
Finalize(CodeAllocator * allocator)7059 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7060 // Generate the constant area if needed.
7061 X86_64Assembler* assembler = GetAssembler();
7062 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7063 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7064 assembler->Align(4, 0);
7065 constant_area_start_ = assembler->CodeSize();
7066
7067 // Populate any jump tables.
7068 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7069 jump_table->CreateJumpTable();
7070 }
7071
7072 // And now add the constant area to the generated code.
7073 assembler->AddConstantArea();
7074 }
7075
7076 // And finish up.
7077 CodeGenerator::Finalize(allocator);
7078 }
7079
LiteralDoubleAddress(double v)7080 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7081 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7082 return Address::RIP(fixup);
7083 }
7084
LiteralFloatAddress(float v)7085 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7086 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7087 return Address::RIP(fixup);
7088 }
7089
LiteralInt32Address(int32_t v)7090 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7091 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7092 return Address::RIP(fixup);
7093 }
7094
LiteralInt64Address(int64_t v)7095 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7096 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7097 return Address::RIP(fixup);
7098 }
7099
7100 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7101 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7102 if (!trg.IsValid()) {
7103 DCHECK_EQ(type, DataType::Type::kVoid);
7104 return;
7105 }
7106
7107 DCHECK_NE(type, DataType::Type::kVoid);
7108
7109 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7110 if (trg.Equals(return_loc)) {
7111 return;
7112 }
7113
7114 // Let the parallel move resolver take care of all of this.
7115 HParallelMove parallel_move(GetGraph()->GetAllocator());
7116 parallel_move.AddMove(return_loc, trg, type, nullptr);
7117 GetMoveResolver()->EmitNativeCode(¶llel_move);
7118 }
7119
LiteralCaseTable(HPackedSwitch * switch_instr)7120 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7121 // Create a fixup to be used to create and address the jump table.
7122 JumpTableRIPFixup* table_fixup =
7123 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7124
7125 // We have to populate the jump tables.
7126 fixups_to_jump_tables_.push_back(table_fixup);
7127 return Address::RIP(table_fixup);
7128 }
7129
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7130 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7131 const Address& addr_high,
7132 int64_t v,
7133 HInstruction* instruction) {
7134 if (IsInt<32>(v)) {
7135 int32_t v_32 = v;
7136 __ movq(addr_low, Immediate(v_32));
7137 MaybeRecordImplicitNullCheck(instruction);
7138 } else {
7139 // Didn't fit in a register. Do it in pieces.
7140 int32_t low_v = Low32Bits(v);
7141 int32_t high_v = High32Bits(v);
7142 __ movl(addr_low, Immediate(low_v));
7143 MaybeRecordImplicitNullCheck(instruction);
7144 __ movl(addr_high, Immediate(high_v));
7145 }
7146 }
7147
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7148 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7149 const uint8_t* roots_data,
7150 const PatchInfo<Label>& info,
7151 uint64_t index_in_table) const {
7152 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7153 uintptr_t address =
7154 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7155 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7156 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7157 dchecked_integral_cast<uint32_t>(address);
7158 }
7159
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7160 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7161 for (const PatchInfo<Label>& info : jit_string_patches_) {
7162 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7163 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7164 PatchJitRootUse(code, roots_data, info, index_in_table);
7165 }
7166
7167 for (const PatchInfo<Label>& info : jit_class_patches_) {
7168 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7169 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7170 PatchJitRootUse(code, roots_data, info, index_in_table);
7171 }
7172 }
7173
7174 #undef __
7175
7176 } // namespace x86_64
7177 } // namespace art
7178