1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method-inl.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "gc/space/image_space.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_x86_64.h"
29 #include "jit/profiling_info.h"
30 #include "linker/linker_patch.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/class-inl.h"
34 #include "mirror/object_reference.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread.h"
37 #include "utils/assembler.h"
38 #include "utils/stack_checks.h"
39 #include "utils/x86_64/assembler_x86_64.h"
40 #include "utils/x86_64/managed_register_x86_64.h"
41
42 namespace art {
43
44 template<class MirrorType>
45 class GcRoot;
46
47 namespace x86_64 {
48
49 static constexpr int kCurrentMethodStackOffset = 0;
50 static constexpr Register kMethodRegisterArgument = RDI;
51 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
52 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
53 // generates less code/data with a small num_entries.
54 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
55
56 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
57 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
58
59 static constexpr int kC2ConditionMask = 0x400;
60
OneRegInReferenceOutSaveEverythingCallerSaves()61 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
62 // Custom calling convention: RAX serves as both input and output.
63 RegisterSet caller_saves = RegisterSet::Empty();
64 caller_saves.Add(Location::RegisterLocation(RAX));
65 return caller_saves;
66 }
67
68 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
69 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
70 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
71
72 class NullCheckSlowPathX86_64 : public SlowPathCode {
73 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)74 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
75
EmitNativeCode(CodeGenerator * codegen)76 void EmitNativeCode(CodeGenerator* codegen) override {
77 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
78 __ Bind(GetEntryLabel());
79 if (instruction_->CanThrowIntoCatchBlock()) {
80 // Live registers will be restored in the catch block if caught.
81 SaveLiveRegisters(codegen, instruction_->GetLocations());
82 }
83 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
84 instruction_,
85 instruction_->GetDexPc(),
86 this);
87 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
88 }
89
IsFatal() const90 bool IsFatal() const override { return true; }
91
GetDescription() const92 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
93
94 private:
95 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
96 };
97
98 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
99 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)100 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
101
EmitNativeCode(CodeGenerator * codegen)102 void EmitNativeCode(CodeGenerator* codegen) override {
103 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
104 __ Bind(GetEntryLabel());
105 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
106 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
107 }
108
IsFatal() const109 bool IsFatal() const override { return true; }
110
GetDescription() const111 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
112
113 private:
114 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
115 };
116
117 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
118 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)119 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
120 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
121
EmitNativeCode(CodeGenerator * codegen)122 void EmitNativeCode(CodeGenerator* codegen) override {
123 __ Bind(GetEntryLabel());
124 if (type_ == DataType::Type::kInt32) {
125 if (is_div_) {
126 __ negl(cpu_reg_);
127 } else {
128 __ xorl(cpu_reg_, cpu_reg_);
129 }
130
131 } else {
132 DCHECK_EQ(DataType::Type::kInt64, type_);
133 if (is_div_) {
134 __ negq(cpu_reg_);
135 } else {
136 __ xorl(cpu_reg_, cpu_reg_);
137 }
138 }
139 __ jmp(GetExitLabel());
140 }
141
GetDescription() const142 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
143
144 private:
145 const CpuRegister cpu_reg_;
146 const DataType::Type type_;
147 const bool is_div_;
148 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
149 };
150
151 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
152 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)153 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
154 : SlowPathCode(instruction), successor_(successor) {}
155
EmitNativeCode(CodeGenerator * codegen)156 void EmitNativeCode(CodeGenerator* codegen) override {
157 LocationSummary* locations = instruction_->GetLocations();
158 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
159 __ Bind(GetEntryLabel());
160 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
161 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
162 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
163 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
164 if (successor_ == nullptr) {
165 __ jmp(GetReturnLabel());
166 } else {
167 __ jmp(x86_64_codegen->GetLabelOf(successor_));
168 }
169 }
170
GetReturnLabel()171 Label* GetReturnLabel() {
172 DCHECK(successor_ == nullptr);
173 return &return_label_;
174 }
175
GetSuccessor() const176 HBasicBlock* GetSuccessor() const {
177 return successor_;
178 }
179
GetDescription() const180 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
181
182 private:
183 HBasicBlock* const successor_;
184 Label return_label_;
185
186 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
187 };
188
189 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
190 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)191 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
192 : SlowPathCode(instruction) {}
193
EmitNativeCode(CodeGenerator * codegen)194 void EmitNativeCode(CodeGenerator* codegen) override {
195 LocationSummary* locations = instruction_->GetLocations();
196 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
197 __ Bind(GetEntryLabel());
198 if (instruction_->CanThrowIntoCatchBlock()) {
199 // Live registers will be restored in the catch block if caught.
200 SaveLiveRegisters(codegen, instruction_->GetLocations());
201 }
202 // Are we using an array length from memory?
203 HInstruction* array_length = instruction_->InputAt(1);
204 Location length_loc = locations->InAt(1);
205 InvokeRuntimeCallingConvention calling_convention;
206 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
207 // Load the array length into our temporary.
208 HArrayLength* length = array_length->AsArrayLength();
209 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
210 Location array_loc = array_length->GetLocations()->InAt(0);
211 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
212 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
213 // Check for conflicts with index.
214 if (length_loc.Equals(locations->InAt(0))) {
215 // We know we aren't using parameter 2.
216 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
217 }
218 __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
219 if (mirror::kUseStringCompression && length->IsStringLength()) {
220 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
221 }
222 }
223
224 // We're moving two locations to locations that could overlap, so we need a parallel
225 // move resolver.
226 codegen->EmitParallelMoves(
227 locations->InAt(0),
228 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
229 DataType::Type::kInt32,
230 length_loc,
231 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
232 DataType::Type::kInt32);
233 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
234 ? kQuickThrowStringBounds
235 : kQuickThrowArrayBounds;
236 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
237 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
238 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
239 }
240
IsFatal() const241 bool IsFatal() const override { return true; }
242
GetDescription() const243 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
244
245 private:
246 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
247 };
248
249 class LoadClassSlowPathX86_64 : public SlowPathCode {
250 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)251 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
252 : SlowPathCode(at), cls_(cls) {
253 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
254 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
255 }
256
EmitNativeCode(CodeGenerator * codegen)257 void EmitNativeCode(CodeGenerator* codegen) override {
258 LocationSummary* locations = instruction_->GetLocations();
259 Location out = locations->Out();
260 const uint32_t dex_pc = instruction_->GetDexPc();
261 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
262 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
263
264 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
265 __ Bind(GetEntryLabel());
266 SaveLiveRegisters(codegen, locations);
267
268 // Custom calling convention: RAX serves as both input and output.
269 if (must_resolve_type) {
270 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
271 dex::TypeIndex type_index = cls_->GetTypeIndex();
272 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
273 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
274 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
275 // If we also must_do_clinit, the resolved type is now in the correct register.
276 } else {
277 DCHECK(must_do_clinit);
278 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
279 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
280 }
281 if (must_do_clinit) {
282 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
283 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
284 }
285
286 // Move the class to the desired location.
287 if (out.IsValid()) {
288 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
289 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
290 }
291
292 RestoreLiveRegisters(codegen, locations);
293 __ jmp(GetExitLabel());
294 }
295
GetDescription() const296 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
297
298 private:
299 // The class this slow path will load.
300 HLoadClass* const cls_;
301
302 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
303 };
304
305 class LoadStringSlowPathX86_64 : public SlowPathCode {
306 public:
LoadStringSlowPathX86_64(HLoadString * instruction)307 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
308
EmitNativeCode(CodeGenerator * codegen)309 void EmitNativeCode(CodeGenerator* codegen) override {
310 LocationSummary* locations = instruction_->GetLocations();
311 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
312
313 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
314 __ Bind(GetEntryLabel());
315 SaveLiveRegisters(codegen, locations);
316
317 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
318 // Custom calling convention: RAX serves as both input and output.
319 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
320 x86_64_codegen->InvokeRuntime(kQuickResolveString,
321 instruction_,
322 instruction_->GetDexPc(),
323 this);
324 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
325 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
326 RestoreLiveRegisters(codegen, locations);
327
328 __ jmp(GetExitLabel());
329 }
330
GetDescription() const331 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
332
333 private:
334 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
335 };
336
337 class TypeCheckSlowPathX86_64 : public SlowPathCode {
338 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)339 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
340 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
341
EmitNativeCode(CodeGenerator * codegen)342 void EmitNativeCode(CodeGenerator* codegen) override {
343 LocationSummary* locations = instruction_->GetLocations();
344 uint32_t dex_pc = instruction_->GetDexPc();
345 DCHECK(instruction_->IsCheckCast()
346 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
347
348 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
349 __ Bind(GetEntryLabel());
350
351 if (kPoisonHeapReferences &&
352 instruction_->IsCheckCast() &&
353 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
354 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
355 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
356 }
357
358 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
359 SaveLiveRegisters(codegen, locations);
360 }
361
362 // We're moving two locations to locations that could overlap, so we need a parallel
363 // move resolver.
364 InvokeRuntimeCallingConvention calling_convention;
365 codegen->EmitParallelMoves(locations->InAt(0),
366 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
367 DataType::Type::kReference,
368 locations->InAt(1),
369 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
370 DataType::Type::kReference);
371 if (instruction_->IsInstanceOf()) {
372 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
373 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
374 } else {
375 DCHECK(instruction_->IsCheckCast());
376 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
377 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
378 }
379
380 if (!is_fatal_) {
381 if (instruction_->IsInstanceOf()) {
382 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
383 }
384
385 RestoreLiveRegisters(codegen, locations);
386 __ jmp(GetExitLabel());
387 }
388 }
389
GetDescription() const390 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
391
IsFatal() const392 bool IsFatal() const override { return is_fatal_; }
393
394 private:
395 const bool is_fatal_;
396
397 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
398 };
399
400 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
401 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)402 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
403 : SlowPathCode(instruction) {}
404
EmitNativeCode(CodeGenerator * codegen)405 void EmitNativeCode(CodeGenerator* codegen) override {
406 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
407 __ Bind(GetEntryLabel());
408 LocationSummary* locations = instruction_->GetLocations();
409 SaveLiveRegisters(codegen, locations);
410 InvokeRuntimeCallingConvention calling_convention;
411 x86_64_codegen->Load32BitValue(
412 CpuRegister(calling_convention.GetRegisterAt(0)),
413 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
414 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
415 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
416 }
417
GetDescription() const418 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
419
420 private:
421 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
422 };
423
424 class ArraySetSlowPathX86_64 : public SlowPathCode {
425 public:
ArraySetSlowPathX86_64(HInstruction * instruction)426 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
427
EmitNativeCode(CodeGenerator * codegen)428 void EmitNativeCode(CodeGenerator* codegen) override {
429 LocationSummary* locations = instruction_->GetLocations();
430 __ Bind(GetEntryLabel());
431 SaveLiveRegisters(codegen, locations);
432
433 InvokeRuntimeCallingConvention calling_convention;
434 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
435 parallel_move.AddMove(
436 locations->InAt(0),
437 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
438 DataType::Type::kReference,
439 nullptr);
440 parallel_move.AddMove(
441 locations->InAt(1),
442 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
443 DataType::Type::kInt32,
444 nullptr);
445 parallel_move.AddMove(
446 locations->InAt(2),
447 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
448 DataType::Type::kReference,
449 nullptr);
450 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
451
452 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
453 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
454 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
455 RestoreLiveRegisters(codegen, locations);
456 __ jmp(GetExitLabel());
457 }
458
GetDescription() const459 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
460
461 private:
462 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
463 };
464
465 // Slow path marking an object reference `ref` during a read
466 // barrier. The field `obj.field` in the object `obj` holding this
467 // reference does not get updated by this slow path after marking (see
468 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
469 //
470 // This means that after the execution of this slow path, `ref` will
471 // always be up-to-date, but `obj.field` may not; i.e., after the
472 // flip, `ref` will be a to-space reference, but `obj.field` will
473 // probably still be a from-space reference (unless it gets updated by
474 // another thread, or if another thread installed another object
475 // reference (different from `ref`) in `obj.field`).
476 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
477 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)478 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
479 Location ref,
480 bool unpoison_ref_before_marking)
481 : SlowPathCode(instruction),
482 ref_(ref),
483 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
484 DCHECK(kEmitCompilerReadBarrier);
485 }
486
GetDescription() const487 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
488
EmitNativeCode(CodeGenerator * codegen)489 void EmitNativeCode(CodeGenerator* codegen) override {
490 LocationSummary* locations = instruction_->GetLocations();
491 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
492 Register ref_reg = ref_cpu_reg.AsRegister();
493 DCHECK(locations->CanCall());
494 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
495 DCHECK(instruction_->IsInstanceFieldGet() ||
496 instruction_->IsStaticFieldGet() ||
497 instruction_->IsArrayGet() ||
498 instruction_->IsArraySet() ||
499 instruction_->IsLoadClass() ||
500 instruction_->IsLoadString() ||
501 instruction_->IsInstanceOf() ||
502 instruction_->IsCheckCast() ||
503 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
504 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
505 << "Unexpected instruction in read barrier marking slow path: "
506 << instruction_->DebugName();
507
508 __ Bind(GetEntryLabel());
509 if (unpoison_ref_before_marking_) {
510 // Object* ref = ref_addr->AsMirrorPtr()
511 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
512 }
513 // No need to save live registers; it's taken care of by the
514 // entrypoint. Also, there is no need to update the stack mask,
515 // as this runtime call will not trigger a garbage collection.
516 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517 DCHECK_NE(ref_reg, RSP);
518 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
519 // "Compact" slow path, saving two moves.
520 //
521 // Instead of using the standard runtime calling convention (input
522 // and output in R0):
523 //
524 // RDI <- ref
525 // RAX <- ReadBarrierMark(RDI)
526 // ref <- RAX
527 //
528 // we just use rX (the register containing `ref`) as input and output
529 // of a dedicated entrypoint:
530 //
531 // rX <- ReadBarrierMarkRegX(rX)
532 //
533 int32_t entry_point_offset =
534 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
535 // This runtime call does not require a stack map.
536 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
537 __ jmp(GetExitLabel());
538 }
539
540 private:
541 // The location (register) of the marked object reference.
542 const Location ref_;
543 // Should the reference in `ref_` be unpoisoned prior to marking it?
544 const bool unpoison_ref_before_marking_;
545
546 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
547 };
548
549 // Slow path marking an object reference `ref` during a read barrier,
550 // and if needed, atomically updating the field `obj.field` in the
551 // object `obj` holding this reference after marking (contrary to
552 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
553 // `obj.field`).
554 //
555 // This means that after the execution of this slow path, both `ref`
556 // and `obj.field` will be up-to-date; i.e., after the flip, both will
557 // hold the same to-space reference (unless another thread installed
558 // another object reference (different from `ref`) in `obj.field`).
559 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
560 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)561 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
562 Location ref,
563 CpuRegister obj,
564 const Address& field_addr,
565 bool unpoison_ref_before_marking,
566 CpuRegister temp1,
567 CpuRegister temp2)
568 : SlowPathCode(instruction),
569 ref_(ref),
570 obj_(obj),
571 field_addr_(field_addr),
572 unpoison_ref_before_marking_(unpoison_ref_before_marking),
573 temp1_(temp1),
574 temp2_(temp2) {
575 DCHECK(kEmitCompilerReadBarrier);
576 }
577
GetDescription() const578 const char* GetDescription() const override {
579 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
580 }
581
EmitNativeCode(CodeGenerator * codegen)582 void EmitNativeCode(CodeGenerator* codegen) override {
583 LocationSummary* locations = instruction_->GetLocations();
584 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
585 Register ref_reg = ref_cpu_reg.AsRegister();
586 DCHECK(locations->CanCall());
587 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
588 // This slow path is only used by the UnsafeCASObject intrinsic.
589 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
590 << "Unexpected instruction in read barrier marking and field updating slow path: "
591 << instruction_->DebugName();
592 DCHECK(instruction_->GetLocations()->Intrinsified());
593 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
594
595 __ Bind(GetEntryLabel());
596 if (unpoison_ref_before_marking_) {
597 // Object* ref = ref_addr->AsMirrorPtr()
598 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
599 }
600
601 // Save the old (unpoisoned) reference.
602 __ movl(temp1_, ref_cpu_reg);
603
604 // No need to save live registers; it's taken care of by the
605 // entrypoint. Also, there is no need to update the stack mask,
606 // as this runtime call will not trigger a garbage collection.
607 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
608 DCHECK_NE(ref_reg, RSP);
609 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
610 // "Compact" slow path, saving two moves.
611 //
612 // Instead of using the standard runtime calling convention (input
613 // and output in R0):
614 //
615 // RDI <- ref
616 // RAX <- ReadBarrierMark(RDI)
617 // ref <- RAX
618 //
619 // we just use rX (the register containing `ref`) as input and output
620 // of a dedicated entrypoint:
621 //
622 // rX <- ReadBarrierMarkRegX(rX)
623 //
624 int32_t entry_point_offset =
625 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
626 // This runtime call does not require a stack map.
627 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
628
629 // If the new reference is different from the old reference,
630 // update the field in the holder (`*field_addr`).
631 //
632 // Note that this field could also hold a different object, if
633 // another thread had concurrently changed it. In that case, the
634 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
635 // operation below would abort the CAS, leaving the field as-is.
636 NearLabel done;
637 __ cmpl(temp1_, ref_cpu_reg);
638 __ j(kEqual, &done);
639
640 // Update the the holder's field atomically. This may fail if
641 // mutator updates before us, but it's OK. This is achived
642 // using a strong compare-and-set (CAS) operation with relaxed
643 // memory synchronization ordering, where the expected value is
644 // the old reference and the desired value is the new reference.
645 // This operation is implemented with a 32-bit LOCK CMPXLCHG
646 // instruction, which requires the expected value (the old
647 // reference) to be in EAX. Save RAX beforehand, and move the
648 // expected value (stored in `temp1_`) into EAX.
649 __ movq(temp2_, CpuRegister(RAX));
650 __ movl(CpuRegister(RAX), temp1_);
651
652 // Convenience aliases.
653 CpuRegister base = obj_;
654 CpuRegister expected = CpuRegister(RAX);
655 CpuRegister value = ref_cpu_reg;
656
657 bool base_equals_value = (base.AsRegister() == value.AsRegister());
658 Register value_reg = ref_reg;
659 if (kPoisonHeapReferences) {
660 if (base_equals_value) {
661 // If `base` and `value` are the same register location, move
662 // `value_reg` to a temporary register. This way, poisoning
663 // `value_reg` won't invalidate `base`.
664 value_reg = temp1_.AsRegister();
665 __ movl(CpuRegister(value_reg), base);
666 }
667
668 // Check that the register allocator did not assign the location
669 // of `expected` (RAX) to `value` nor to `base`, so that heap
670 // poisoning (when enabled) works as intended below.
671 // - If `value` were equal to `expected`, both references would
672 // be poisoned twice, meaning they would not be poisoned at
673 // all, as heap poisoning uses address negation.
674 // - If `base` were equal to `expected`, poisoning `expected`
675 // would invalidate `base`.
676 DCHECK_NE(value_reg, expected.AsRegister());
677 DCHECK_NE(base.AsRegister(), expected.AsRegister());
678
679 __ PoisonHeapReference(expected);
680 __ PoisonHeapReference(CpuRegister(value_reg));
681 }
682
683 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
684
685 // If heap poisoning is enabled, we need to unpoison the values
686 // that were poisoned earlier.
687 if (kPoisonHeapReferences) {
688 if (base_equals_value) {
689 // `value_reg` has been moved to a temporary register, no need
690 // to unpoison it.
691 } else {
692 __ UnpoisonHeapReference(CpuRegister(value_reg));
693 }
694 // No need to unpoison `expected` (RAX), as it is be overwritten below.
695 }
696
697 // Restore RAX.
698 __ movq(CpuRegister(RAX), temp2_);
699
700 __ Bind(&done);
701 __ jmp(GetExitLabel());
702 }
703
704 private:
705 // The location (register) of the marked object reference.
706 const Location ref_;
707 // The register containing the object holding the marked object reference field.
708 const CpuRegister obj_;
709 // The address of the marked reference field. The base of this address must be `obj_`.
710 const Address field_addr_;
711
712 // Should the reference in `ref_` be unpoisoned prior to marking it?
713 const bool unpoison_ref_before_marking_;
714
715 const CpuRegister temp1_;
716 const CpuRegister temp2_;
717
718 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
719 };
720
721 // Slow path generating a read barrier for a heap reference.
722 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
723 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)724 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
725 Location out,
726 Location ref,
727 Location obj,
728 uint32_t offset,
729 Location index)
730 : SlowPathCode(instruction),
731 out_(out),
732 ref_(ref),
733 obj_(obj),
734 offset_(offset),
735 index_(index) {
736 DCHECK(kEmitCompilerReadBarrier);
737 // If `obj` is equal to `out` or `ref`, it means the initial
738 // object has been overwritten by (or after) the heap object
739 // reference load to be instrumented, e.g.:
740 //
741 // __ movl(out, Address(out, offset));
742 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
743 //
744 // In that case, we have lost the information about the original
745 // object, and the emitted read barrier cannot work properly.
746 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
747 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
748 }
749
EmitNativeCode(CodeGenerator * codegen)750 void EmitNativeCode(CodeGenerator* codegen) override {
751 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
752 LocationSummary* locations = instruction_->GetLocations();
753 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
754 DCHECK(locations->CanCall());
755 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
756 DCHECK(instruction_->IsInstanceFieldGet() ||
757 instruction_->IsStaticFieldGet() ||
758 instruction_->IsArrayGet() ||
759 instruction_->IsInstanceOf() ||
760 instruction_->IsCheckCast() ||
761 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
762 << "Unexpected instruction in read barrier for heap reference slow path: "
763 << instruction_->DebugName();
764
765 __ Bind(GetEntryLabel());
766 SaveLiveRegisters(codegen, locations);
767
768 // We may have to change the index's value, but as `index_` is a
769 // constant member (like other "inputs" of this slow path),
770 // introduce a copy of it, `index`.
771 Location index = index_;
772 if (index_.IsValid()) {
773 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
774 if (instruction_->IsArrayGet()) {
775 // Compute real offset and store it in index_.
776 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
777 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
778 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
779 // We are about to change the value of `index_reg` (see the
780 // calls to art::x86_64::X86_64Assembler::shll and
781 // art::x86_64::X86_64Assembler::AddImmediate below), but it
782 // has not been saved by the previous call to
783 // art::SlowPathCode::SaveLiveRegisters, as it is a
784 // callee-save register --
785 // art::SlowPathCode::SaveLiveRegisters does not consider
786 // callee-save registers, as it has been designed with the
787 // assumption that callee-save registers are supposed to be
788 // handled by the called function. So, as a callee-save
789 // register, `index_reg` _would_ eventually be saved onto
790 // the stack, but it would be too late: we would have
791 // changed its value earlier. Therefore, we manually save
792 // it here into another freely available register,
793 // `free_reg`, chosen of course among the caller-save
794 // registers (as a callee-save `free_reg` register would
795 // exhibit the same problem).
796 //
797 // Note we could have requested a temporary register from
798 // the register allocator instead; but we prefer not to, as
799 // this is a slow path, and we know we can find a
800 // caller-save register that is available.
801 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
802 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
803 index_reg = free_reg;
804 index = Location::RegisterLocation(index_reg);
805 } else {
806 // The initial register stored in `index_` has already been
807 // saved in the call to art::SlowPathCode::SaveLiveRegisters
808 // (as it is not a callee-save register), so we can freely
809 // use it.
810 }
811 // Shifting the index value contained in `index_reg` by the
812 // scale factor (2) cannot overflow in practice, as the
813 // runtime is unable to allocate object arrays with a size
814 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
815 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
816 static_assert(
817 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
818 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
819 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
820 } else {
821 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
822 // intrinsics, `index_` is not shifted by a scale factor of 2
823 // (as in the case of ArrayGet), as it is actually an offset
824 // to an object field within an object.
825 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
826 DCHECK(instruction_->GetLocations()->Intrinsified());
827 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
828 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
829 << instruction_->AsInvoke()->GetIntrinsic();
830 DCHECK_EQ(offset_, 0U);
831 DCHECK(index_.IsRegister());
832 }
833 }
834
835 // We're moving two or three locations to locations that could
836 // overlap, so we need a parallel move resolver.
837 InvokeRuntimeCallingConvention calling_convention;
838 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
839 parallel_move.AddMove(ref_,
840 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
841 DataType::Type::kReference,
842 nullptr);
843 parallel_move.AddMove(obj_,
844 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
845 DataType::Type::kReference,
846 nullptr);
847 if (index.IsValid()) {
848 parallel_move.AddMove(index,
849 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
850 DataType::Type::kInt32,
851 nullptr);
852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
853 } else {
854 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
855 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
856 }
857 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
858 instruction_,
859 instruction_->GetDexPc(),
860 this);
861 CheckEntrypointTypes<
862 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
863 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
864
865 RestoreLiveRegisters(codegen, locations);
866 __ jmp(GetExitLabel());
867 }
868
GetDescription() const869 const char* GetDescription() const override {
870 return "ReadBarrierForHeapReferenceSlowPathX86_64";
871 }
872
873 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)874 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
875 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
876 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
877 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
878 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
879 return static_cast<CpuRegister>(i);
880 }
881 }
882 // We shall never fail to find a free caller-save register, as
883 // there are more than two core caller-save registers on x86-64
884 // (meaning it is possible to find one which is different from
885 // `ref` and `obj`).
886 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
887 LOG(FATAL) << "Could not find a free caller-save register";
888 UNREACHABLE();
889 }
890
891 const Location out_;
892 const Location ref_;
893 const Location obj_;
894 const uint32_t offset_;
895 // An additional location containing an index to an array.
896 // Only used for HArrayGet and the UnsafeGetObject &
897 // UnsafeGetObjectVolatile intrinsics.
898 const Location index_;
899
900 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
901 };
902
903 // Slow path generating a read barrier for a GC root.
904 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
905 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)906 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
907 : SlowPathCode(instruction), out_(out), root_(root) {
908 DCHECK(kEmitCompilerReadBarrier);
909 }
910
EmitNativeCode(CodeGenerator * codegen)911 void EmitNativeCode(CodeGenerator* codegen) override {
912 LocationSummary* locations = instruction_->GetLocations();
913 DCHECK(locations->CanCall());
914 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
915 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
916 << "Unexpected instruction in read barrier for GC root slow path: "
917 << instruction_->DebugName();
918
919 __ Bind(GetEntryLabel());
920 SaveLiveRegisters(codegen, locations);
921
922 InvokeRuntimeCallingConvention calling_convention;
923 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
924 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
925 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
926 instruction_,
927 instruction_->GetDexPc(),
928 this);
929 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
930 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
931
932 RestoreLiveRegisters(codegen, locations);
933 __ jmp(GetExitLabel());
934 }
935
GetDescription() const936 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
937
938 private:
939 const Location out_;
940 const Location root_;
941
942 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
943 };
944
945 #undef __
946 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
947 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
948
X86_64IntegerCondition(IfCondition cond)949 inline Condition X86_64IntegerCondition(IfCondition cond) {
950 switch (cond) {
951 case kCondEQ: return kEqual;
952 case kCondNE: return kNotEqual;
953 case kCondLT: return kLess;
954 case kCondLE: return kLessEqual;
955 case kCondGT: return kGreater;
956 case kCondGE: return kGreaterEqual;
957 case kCondB: return kBelow;
958 case kCondBE: return kBelowEqual;
959 case kCondA: return kAbove;
960 case kCondAE: return kAboveEqual;
961 }
962 LOG(FATAL) << "Unreachable";
963 UNREACHABLE();
964 }
965
966 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)967 inline Condition X86_64FPCondition(IfCondition cond) {
968 switch (cond) {
969 case kCondEQ: return kEqual;
970 case kCondNE: return kNotEqual;
971 case kCondLT: return kBelow;
972 case kCondLE: return kBelowEqual;
973 case kCondGT: return kAbove;
974 case kCondGE: return kAboveEqual;
975 default: break; // should not happen
976 }
977 LOG(FATAL) << "Unreachable";
978 UNREACHABLE();
979 }
980
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)981 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
982 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
983 ArtMethod* method ATTRIBUTE_UNUSED) {
984 return desired_dispatch_info;
985 }
986
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)987 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
988 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
989 // All registers are assumed to be correctly set up.
990
991 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
992 switch (invoke->GetMethodLoadKind()) {
993 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
994 // temp = thread->string_init_entrypoint
995 uint32_t offset =
996 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
997 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
998 break;
999 }
1000 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
1001 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1002 break;
1003 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
1004 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1005 __ leal(temp.AsRegister<CpuRegister>(),
1006 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1007 RecordBootImageMethodPatch(invoke);
1008 break;
1009 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
1010 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1011 __ movl(temp.AsRegister<CpuRegister>(),
1012 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1013 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1014 break;
1015 }
1016 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1017 __ movq(temp.AsRegister<CpuRegister>(),
1018 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1019 RecordMethodBssEntryPatch(invoke);
1020 // No need for memory fence, thanks to the x86-64 memory model.
1021 break;
1022 }
1023 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
1024 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1025 break;
1026 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1027 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1028 return; // No code pointer retrieval; the runtime performs the call directly.
1029 }
1030 }
1031
1032 switch (invoke->GetCodePtrLocation()) {
1033 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1034 __ call(&frame_entry_label_);
1035 break;
1036 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1037 // (callee_method + offset_of_quick_compiled_code)()
1038 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1039 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1040 kX86_64PointerSize).SizeValue()));
1041 break;
1042 }
1043 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1044
1045 DCHECK(!IsLeafMethod());
1046 }
1047
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1048 void CodeGeneratorX86_64::GenerateVirtualCall(
1049 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1050 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1051 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1052 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1053
1054 // Use the calling convention instead of the location of the receiver, as
1055 // intrinsics may have put the receiver in a different register. In the intrinsics
1056 // slow path, the arguments have been moved to the right place, so here we are
1057 // guaranteed that the receiver is the first register of the calling convention.
1058 InvokeDexCallingConvention calling_convention;
1059 Register receiver = calling_convention.GetRegisterAt(0);
1060
1061 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1062 // /* HeapReference<Class> */ temp = receiver->klass_
1063 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1064 MaybeRecordImplicitNullCheck(invoke);
1065 // Instead of simply (possibly) unpoisoning `temp` here, we should
1066 // emit a read barrier for the previous class reference load.
1067 // However this is not required in practice, as this is an
1068 // intermediate/temporary reference and because the current
1069 // concurrent copying collector keeps the from-space memory
1070 // intact/accessible until the end of the marking phase (the
1071 // concurrent copying collector may not in the future).
1072 __ MaybeUnpoisonHeapReference(temp);
1073
1074 MaybeGenerateInlineCacheCheck(invoke, temp);
1075
1076 // temp = temp->GetMethodAt(method_offset);
1077 __ movq(temp, Address(temp, method_offset));
1078 // call temp->GetEntryPoint();
1079 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1080 kX86_64PointerSize).SizeValue()));
1081 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1082 }
1083
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1084 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1085 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1086 __ Bind(&boot_image_other_patches_.back().label);
1087 }
1088
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1089 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1090 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1091 __ Bind(&boot_image_other_patches_.back().label);
1092 }
1093
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1094 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1095 boot_image_method_patches_.emplace_back(
1096 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1097 __ Bind(&boot_image_method_patches_.back().label);
1098 }
1099
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1100 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1101 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1102 __ Bind(&method_bss_entry_patches_.back().label);
1103 }
1104
RecordBootImageTypePatch(HLoadClass * load_class)1105 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1106 boot_image_type_patches_.emplace_back(
1107 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1108 __ Bind(&boot_image_type_patches_.back().label);
1109 }
1110
NewTypeBssEntryPatch(HLoadClass * load_class)1111 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1112 type_bss_entry_patches_.emplace_back(
1113 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1114 return &type_bss_entry_patches_.back().label;
1115 }
1116
RecordBootImageStringPatch(HLoadString * load_string)1117 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1118 boot_image_string_patches_.emplace_back(
1119 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1120 __ Bind(&boot_image_string_patches_.back().label);
1121 }
1122
NewStringBssEntryPatch(HLoadString * load_string)1123 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1124 string_bss_entry_patches_.emplace_back(
1125 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1126 return &string_bss_entry_patches_.back().label;
1127 }
1128
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1129 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1130 if (GetCompilerOptions().IsBootImage()) {
1131 __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1132 RecordBootImageIntrinsicPatch(boot_image_reference);
1133 } else if (GetCompilerOptions().GetCompilePic()) {
1134 __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1135 RecordBootImageRelRoPatch(boot_image_reference);
1136 } else {
1137 DCHECK(Runtime::Current()->UseJitCompilation());
1138 gc::Heap* heap = Runtime::Current()->GetHeap();
1139 DCHECK(!heap->GetBootImageSpaces().empty());
1140 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1141 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1142 }
1143 }
1144
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)1145 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
1146 uint32_t boot_image_offset) {
1147 DCHECK(invoke->IsStatic());
1148 InvokeRuntimeCallingConvention calling_convention;
1149 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
1150 if (GetCompilerOptions().IsBootImage()) {
1151 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
1152 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1153 __ leal(argument,
1154 Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1155 MethodReference target_method = invoke->GetTargetMethod();
1156 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1157 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1158 __ Bind(&boot_image_type_patches_.back().label);
1159 } else {
1160 LoadBootImageAddress(argument, boot_image_offset);
1161 }
1162 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
1163 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
1164 }
1165
1166 // The label points to the end of the "movl" or another instruction but the literal offset
1167 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1168 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1169
1170 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1171 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1172 const ArenaDeque<PatchInfo<Label>>& infos,
1173 ArenaVector<linker::LinkerPatch>* linker_patches) {
1174 for (const PatchInfo<Label>& info : infos) {
1175 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1176 linker_patches->push_back(
1177 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1178 }
1179 }
1180
1181 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1182 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1183 const DexFile* target_dex_file,
1184 uint32_t pc_insn_offset,
1185 uint32_t boot_image_offset) {
1186 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1187 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1188 }
1189
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1190 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1191 DCHECK(linker_patches->empty());
1192 size_t size =
1193 boot_image_method_patches_.size() +
1194 method_bss_entry_patches_.size() +
1195 boot_image_type_patches_.size() +
1196 type_bss_entry_patches_.size() +
1197 boot_image_string_patches_.size() +
1198 string_bss_entry_patches_.size() +
1199 boot_image_other_patches_.size();
1200 linker_patches->reserve(size);
1201 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1202 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1203 boot_image_method_patches_, linker_patches);
1204 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1205 boot_image_type_patches_, linker_patches);
1206 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1207 boot_image_string_patches_, linker_patches);
1208 } else {
1209 DCHECK(boot_image_method_patches_.empty());
1210 DCHECK(boot_image_type_patches_.empty());
1211 DCHECK(boot_image_string_patches_.empty());
1212 }
1213 if (GetCompilerOptions().IsBootImage()) {
1214 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1215 boot_image_other_patches_, linker_patches);
1216 } else {
1217 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1218 boot_image_other_patches_, linker_patches);
1219 }
1220 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1221 method_bss_entry_patches_, linker_patches);
1222 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1223 type_bss_entry_patches_, linker_patches);
1224 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1225 string_bss_entry_patches_, linker_patches);
1226 DCHECK_EQ(size, linker_patches->size());
1227 }
1228
DumpCoreRegister(std::ostream & stream,int reg) const1229 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1230 stream << Register(reg);
1231 }
1232
DumpFloatingPointRegister(std::ostream & stream,int reg) const1233 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1234 stream << FloatRegister(reg);
1235 }
1236
GetInstructionSetFeatures() const1237 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1238 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1239 }
1240
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1241 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1242 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1243 return kX86_64WordSize;
1244 }
1245
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1246 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1247 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1248 return kX86_64WordSize;
1249 }
1250
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1251 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1252 if (GetGraph()->HasSIMD()) {
1253 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1254 } else {
1255 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1256 }
1257 return GetSlowPathFPWidth();
1258 }
1259
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1260 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1261 if (GetGraph()->HasSIMD()) {
1262 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1263 } else {
1264 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1265 }
1266 return GetSlowPathFPWidth();
1267 }
1268
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1269 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1270 HInstruction* instruction,
1271 uint32_t dex_pc,
1272 SlowPathCode* slow_path) {
1273 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1274 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1275 if (EntrypointRequiresStackMap(entrypoint)) {
1276 RecordPcInfo(instruction, dex_pc, slow_path);
1277 }
1278 }
1279
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1280 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1281 HInstruction* instruction,
1282 SlowPathCode* slow_path) {
1283 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1284 GenerateInvokeRuntime(entry_point_offset);
1285 }
1286
GenerateInvokeRuntime(int32_t entry_point_offset)1287 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1288 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1289 }
1290
1291 static constexpr int kNumberOfCpuRegisterPairs = 0;
1292 // Use a fake return address register to mimic Quick.
1293 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1294 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1295 const CompilerOptions& compiler_options,
1296 OptimizingCompilerStats* stats)
1297 : CodeGenerator(graph,
1298 kNumberOfCpuRegisters,
1299 kNumberOfFloatRegisters,
1300 kNumberOfCpuRegisterPairs,
1301 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1302 arraysize(kCoreCalleeSaves))
1303 | (1 << kFakeReturnRegister),
1304 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1305 arraysize(kFpuCalleeSaves)),
1306 compiler_options,
1307 stats),
1308 block_labels_(nullptr),
1309 location_builder_(graph, this),
1310 instruction_visitor_(graph, this),
1311 move_resolver_(graph->GetAllocator(), this),
1312 assembler_(graph->GetAllocator()),
1313 constant_area_start_(0),
1314 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1315 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1316 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1317 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1318 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1319 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1320 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1321 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1322 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1323 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1324 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1325 }
1326
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1327 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1328 CodeGeneratorX86_64* codegen)
1329 : InstructionCodeGenerator(graph, codegen),
1330 assembler_(codegen->GetAssembler()),
1331 codegen_(codegen) {}
1332
SetupBlockedRegisters() const1333 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1334 // Stack register is always reserved.
1335 blocked_core_registers_[RSP] = true;
1336
1337 // Block the register used as TMP.
1338 blocked_core_registers_[TMP] = true;
1339 }
1340
DWARFReg(Register reg)1341 static dwarf::Reg DWARFReg(Register reg) {
1342 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1343 }
1344
DWARFReg(FloatRegister reg)1345 static dwarf::Reg DWARFReg(FloatRegister reg) {
1346 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1347 }
1348
MaybeIncrementHotness(bool is_frame_entry)1349 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1350 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1351 NearLabel overflow;
1352 Register method = kMethodRegisterArgument;
1353 if (!is_frame_entry) {
1354 CHECK(RequiresCurrentMethod());
1355 method = TMP;
1356 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1357 }
1358 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1359 Immediate(ArtMethod::MaxCounter()));
1360 __ j(kEqual, &overflow);
1361 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1362 Immediate(1));
1363 __ Bind(&overflow);
1364 }
1365
1366 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1367 ScopedObjectAccess soa(Thread::Current());
1368 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1369 if (info != nullptr) {
1370 uint64_t address = reinterpret_cast64<uint64_t>(info);
1371 NearLabel done;
1372 __ movq(CpuRegister(TMP), Immediate(address));
1373 __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1374 Immediate(1));
1375 __ j(kCarryClear, &done);
1376 if (HasEmptyFrame()) {
1377 CHECK(is_frame_entry);
1378 // Frame alignment, and the stub expects the method on the stack.
1379 __ pushq(CpuRegister(RDI));
1380 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1381 __ cfi().RelOffset(DWARFReg(RDI), 0);
1382 } else if (!RequiresCurrentMethod()) {
1383 CHECK(is_frame_entry);
1384 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
1385 }
1386 GenerateInvokeRuntime(
1387 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1388 if (HasEmptyFrame()) {
1389 __ popq(CpuRegister(RDI));
1390 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1391 __ cfi().Restore(DWARFReg(RDI));
1392 }
1393 __ Bind(&done);
1394 }
1395 }
1396 }
1397
GenerateFrameEntry()1398 void CodeGeneratorX86_64::GenerateFrameEntry() {
1399 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1400 __ Bind(&frame_entry_label_);
1401 bool skip_overflow_check = IsLeafMethod()
1402 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1403 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1404
1405
1406 if (!skip_overflow_check) {
1407 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1408 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1409 RecordPcInfo(nullptr, 0);
1410 }
1411
1412 if (!HasEmptyFrame()) {
1413 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1414 Register reg = kCoreCalleeSaves[i];
1415 if (allocated_registers_.ContainsCoreRegister(reg)) {
1416 __ pushq(CpuRegister(reg));
1417 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1418 __ cfi().RelOffset(DWARFReg(reg), 0);
1419 }
1420 }
1421
1422 int adjust = GetFrameSize() - GetCoreSpillSize();
1423 __ subq(CpuRegister(RSP), Immediate(adjust));
1424 __ cfi().AdjustCFAOffset(adjust);
1425 uint32_t xmm_spill_location = GetFpuSpillStart();
1426 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1427
1428 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1429 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1430 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1431 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1432 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1433 }
1434 }
1435
1436 // Save the current method if we need it. Note that we do not
1437 // do this in HCurrentMethod, as the instruction might have been removed
1438 // in the SSA graph.
1439 if (RequiresCurrentMethod()) {
1440 CHECK(!HasEmptyFrame());
1441 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1442 CpuRegister(kMethodRegisterArgument));
1443 }
1444
1445 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1446 CHECK(!HasEmptyFrame());
1447 // Initialize should_deoptimize flag to 0.
1448 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1449 }
1450 }
1451
1452 MaybeIncrementHotness(/* is_frame_entry= */ true);
1453 }
1454
GenerateFrameExit()1455 void CodeGeneratorX86_64::GenerateFrameExit() {
1456 __ cfi().RememberState();
1457 if (!HasEmptyFrame()) {
1458 uint32_t xmm_spill_location = GetFpuSpillStart();
1459 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1460 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1461 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1462 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1463 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1464 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1465 }
1466 }
1467
1468 int adjust = GetFrameSize() - GetCoreSpillSize();
1469 __ addq(CpuRegister(RSP), Immediate(adjust));
1470 __ cfi().AdjustCFAOffset(-adjust);
1471
1472 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1473 Register reg = kCoreCalleeSaves[i];
1474 if (allocated_registers_.ContainsCoreRegister(reg)) {
1475 __ popq(CpuRegister(reg));
1476 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1477 __ cfi().Restore(DWARFReg(reg));
1478 }
1479 }
1480 }
1481 __ ret();
1482 __ cfi().RestoreState();
1483 __ cfi().DefCFAOffset(GetFrameSize());
1484 }
1485
Bind(HBasicBlock * block)1486 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1487 __ Bind(GetLabelOf(block));
1488 }
1489
Move(Location destination,Location source)1490 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1491 if (source.Equals(destination)) {
1492 return;
1493 }
1494 if (destination.IsRegister()) {
1495 CpuRegister dest = destination.AsRegister<CpuRegister>();
1496 if (source.IsRegister()) {
1497 __ movq(dest, source.AsRegister<CpuRegister>());
1498 } else if (source.IsFpuRegister()) {
1499 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1500 } else if (source.IsStackSlot()) {
1501 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1502 } else if (source.IsConstant()) {
1503 HConstant* constant = source.GetConstant();
1504 if (constant->IsLongConstant()) {
1505 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1506 } else {
1507 Load32BitValue(dest, GetInt32ValueOf(constant));
1508 }
1509 } else {
1510 DCHECK(source.IsDoubleStackSlot());
1511 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1512 }
1513 } else if (destination.IsFpuRegister()) {
1514 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1515 if (source.IsRegister()) {
1516 __ movd(dest, source.AsRegister<CpuRegister>());
1517 } else if (source.IsFpuRegister()) {
1518 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1519 } else if (source.IsConstant()) {
1520 HConstant* constant = source.GetConstant();
1521 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1522 if (constant->IsFloatConstant()) {
1523 Load32BitValue(dest, static_cast<int32_t>(value));
1524 } else {
1525 Load64BitValue(dest, value);
1526 }
1527 } else if (source.IsStackSlot()) {
1528 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1529 } else {
1530 DCHECK(source.IsDoubleStackSlot());
1531 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1532 }
1533 } else if (destination.IsStackSlot()) {
1534 if (source.IsRegister()) {
1535 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1536 source.AsRegister<CpuRegister>());
1537 } else if (source.IsFpuRegister()) {
1538 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1539 source.AsFpuRegister<XmmRegister>());
1540 } else if (source.IsConstant()) {
1541 HConstant* constant = source.GetConstant();
1542 int32_t value = GetInt32ValueOf(constant);
1543 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1544 } else {
1545 DCHECK(source.IsStackSlot()) << source;
1546 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1547 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1548 }
1549 } else {
1550 DCHECK(destination.IsDoubleStackSlot());
1551 if (source.IsRegister()) {
1552 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1553 source.AsRegister<CpuRegister>());
1554 } else if (source.IsFpuRegister()) {
1555 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1556 source.AsFpuRegister<XmmRegister>());
1557 } else if (source.IsConstant()) {
1558 HConstant* constant = source.GetConstant();
1559 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1560 int64_t value = GetInt64ValueOf(constant);
1561 Store64BitValueToStack(destination, value);
1562 } else {
1563 DCHECK(source.IsDoubleStackSlot());
1564 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1565 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1566 }
1567 }
1568 }
1569
MoveConstant(Location location,int32_t value)1570 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1571 DCHECK(location.IsRegister());
1572 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1573 }
1574
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1575 void CodeGeneratorX86_64::MoveLocation(
1576 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1577 Move(dst, src);
1578 }
1579
AddLocationAsTemp(Location location,LocationSummary * locations)1580 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1581 if (location.IsRegister()) {
1582 locations->AddTemp(location);
1583 } else {
1584 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1585 }
1586 }
1587
HandleGoto(HInstruction * got,HBasicBlock * successor)1588 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1589 if (successor->IsExitBlock()) {
1590 DCHECK(got->GetPrevious()->AlwaysThrows());
1591 return; // no code needed
1592 }
1593
1594 HBasicBlock* block = got->GetBlock();
1595 HInstruction* previous = got->GetPrevious();
1596
1597 HLoopInformation* info = block->GetLoopInformation();
1598 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1599 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1600 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1601 return;
1602 }
1603
1604 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1605 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1606 }
1607 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1608 __ jmp(codegen_->GetLabelOf(successor));
1609 }
1610 }
1611
VisitGoto(HGoto * got)1612 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1613 got->SetLocations(nullptr);
1614 }
1615
VisitGoto(HGoto * got)1616 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1617 HandleGoto(got, got->GetSuccessor());
1618 }
1619
VisitTryBoundary(HTryBoundary * try_boundary)1620 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1621 try_boundary->SetLocations(nullptr);
1622 }
1623
VisitTryBoundary(HTryBoundary * try_boundary)1624 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1625 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1626 if (!successor->IsExitBlock()) {
1627 HandleGoto(try_boundary, successor);
1628 }
1629 }
1630
VisitExit(HExit * exit)1631 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1632 exit->SetLocations(nullptr);
1633 }
1634
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1635 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1636 }
1637
1638 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1639 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1640 LabelType* true_label,
1641 LabelType* false_label) {
1642 if (cond->IsFPConditionTrueIfNaN()) {
1643 __ j(kUnordered, true_label);
1644 } else if (cond->IsFPConditionFalseIfNaN()) {
1645 __ j(kUnordered, false_label);
1646 }
1647 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1648 }
1649
GenerateCompareTest(HCondition * condition)1650 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1651 LocationSummary* locations = condition->GetLocations();
1652
1653 Location left = locations->InAt(0);
1654 Location right = locations->InAt(1);
1655 DataType::Type type = condition->InputAt(0)->GetType();
1656 switch (type) {
1657 case DataType::Type::kBool:
1658 case DataType::Type::kUint8:
1659 case DataType::Type::kInt8:
1660 case DataType::Type::kUint16:
1661 case DataType::Type::kInt16:
1662 case DataType::Type::kInt32:
1663 case DataType::Type::kReference: {
1664 codegen_->GenerateIntCompare(left, right);
1665 break;
1666 }
1667 case DataType::Type::kInt64: {
1668 codegen_->GenerateLongCompare(left, right);
1669 break;
1670 }
1671 case DataType::Type::kFloat32: {
1672 if (right.IsFpuRegister()) {
1673 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1674 } else if (right.IsConstant()) {
1675 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1676 codegen_->LiteralFloatAddress(
1677 right.GetConstant()->AsFloatConstant()->GetValue()));
1678 } else {
1679 DCHECK(right.IsStackSlot());
1680 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1681 Address(CpuRegister(RSP), right.GetStackIndex()));
1682 }
1683 break;
1684 }
1685 case DataType::Type::kFloat64: {
1686 if (right.IsFpuRegister()) {
1687 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1688 } else if (right.IsConstant()) {
1689 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1690 codegen_->LiteralDoubleAddress(
1691 right.GetConstant()->AsDoubleConstant()->GetValue()));
1692 } else {
1693 DCHECK(right.IsDoubleStackSlot());
1694 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1695 Address(CpuRegister(RSP), right.GetStackIndex()));
1696 }
1697 break;
1698 }
1699 default:
1700 LOG(FATAL) << "Unexpected condition type " << type;
1701 }
1702 }
1703
1704 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1705 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1706 LabelType* true_target_in,
1707 LabelType* false_target_in) {
1708 // Generated branching requires both targets to be explicit. If either of the
1709 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1710 LabelType fallthrough_target;
1711 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1712 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1713
1714 // Generate the comparison to set the CC.
1715 GenerateCompareTest(condition);
1716
1717 // Now generate the correct jump(s).
1718 DataType::Type type = condition->InputAt(0)->GetType();
1719 switch (type) {
1720 case DataType::Type::kInt64: {
1721 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1722 break;
1723 }
1724 case DataType::Type::kFloat32: {
1725 GenerateFPJumps(condition, true_target, false_target);
1726 break;
1727 }
1728 case DataType::Type::kFloat64: {
1729 GenerateFPJumps(condition, true_target, false_target);
1730 break;
1731 }
1732 default:
1733 LOG(FATAL) << "Unexpected condition type " << type;
1734 }
1735
1736 if (false_target != &fallthrough_target) {
1737 __ jmp(false_target);
1738 }
1739
1740 if (fallthrough_target.IsLinked()) {
1741 __ Bind(&fallthrough_target);
1742 }
1743 }
1744
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1745 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1746 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1747 // are set only strictly before `branch`. We can't use the eflags on long
1748 // conditions if they are materialized due to the complex branching.
1749 return cond->IsCondition() &&
1750 cond->GetNext() == branch &&
1751 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1752 }
1753
1754 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1755 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1756 size_t condition_input_index,
1757 LabelType* true_target,
1758 LabelType* false_target) {
1759 HInstruction* cond = instruction->InputAt(condition_input_index);
1760
1761 if (true_target == nullptr && false_target == nullptr) {
1762 // Nothing to do. The code always falls through.
1763 return;
1764 } else if (cond->IsIntConstant()) {
1765 // Constant condition, statically compared against "true" (integer value 1).
1766 if (cond->AsIntConstant()->IsTrue()) {
1767 if (true_target != nullptr) {
1768 __ jmp(true_target);
1769 }
1770 } else {
1771 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1772 if (false_target != nullptr) {
1773 __ jmp(false_target);
1774 }
1775 }
1776 return;
1777 }
1778
1779 // The following code generates these patterns:
1780 // (1) true_target == nullptr && false_target != nullptr
1781 // - opposite condition true => branch to false_target
1782 // (2) true_target != nullptr && false_target == nullptr
1783 // - condition true => branch to true_target
1784 // (3) true_target != nullptr && false_target != nullptr
1785 // - condition true => branch to true_target
1786 // - branch to false_target
1787 if (IsBooleanValueOrMaterializedCondition(cond)) {
1788 if (AreEflagsSetFrom(cond, instruction)) {
1789 if (true_target == nullptr) {
1790 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1791 } else {
1792 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1793 }
1794 } else {
1795 // Materialized condition, compare against 0.
1796 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1797 if (lhs.IsRegister()) {
1798 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1799 } else {
1800 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1801 }
1802 if (true_target == nullptr) {
1803 __ j(kEqual, false_target);
1804 } else {
1805 __ j(kNotEqual, true_target);
1806 }
1807 }
1808 } else {
1809 // Condition has not been materialized, use its inputs as the
1810 // comparison and its condition as the branch condition.
1811 HCondition* condition = cond->AsCondition();
1812
1813 // If this is a long or FP comparison that has been folded into
1814 // the HCondition, generate the comparison directly.
1815 DataType::Type type = condition->InputAt(0)->GetType();
1816 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1817 GenerateCompareTestAndBranch(condition, true_target, false_target);
1818 return;
1819 }
1820
1821 Location lhs = condition->GetLocations()->InAt(0);
1822 Location rhs = condition->GetLocations()->InAt(1);
1823 codegen_->GenerateIntCompare(lhs, rhs);
1824 if (true_target == nullptr) {
1825 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1826 } else {
1827 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1828 }
1829 }
1830
1831 // If neither branch falls through (case 3), the conditional branch to `true_target`
1832 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1833 if (true_target != nullptr && false_target != nullptr) {
1834 __ jmp(false_target);
1835 }
1836 }
1837
VisitIf(HIf * if_instr)1838 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1839 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1840 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1841 locations->SetInAt(0, Location::Any());
1842 }
1843 }
1844
VisitIf(HIf * if_instr)1845 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1846 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1847 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1848 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1849 nullptr : codegen_->GetLabelOf(true_successor);
1850 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1851 nullptr : codegen_->GetLabelOf(false_successor);
1852 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1853 }
1854
VisitDeoptimize(HDeoptimize * deoptimize)1855 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1856 LocationSummary* locations = new (GetGraph()->GetAllocator())
1857 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1858 InvokeRuntimeCallingConvention calling_convention;
1859 RegisterSet caller_saves = RegisterSet::Empty();
1860 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1861 locations->SetCustomSlowPathCallerSaves(caller_saves);
1862 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1863 locations->SetInAt(0, Location::Any());
1864 }
1865 }
1866
VisitDeoptimize(HDeoptimize * deoptimize)1867 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1868 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1869 GenerateTestAndBranch<Label>(deoptimize,
1870 /* condition_input_index= */ 0,
1871 slow_path->GetEntryLabel(),
1872 /* false_target= */ nullptr);
1873 }
1874
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1875 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1876 LocationSummary* locations = new (GetGraph()->GetAllocator())
1877 LocationSummary(flag, LocationSummary::kNoCall);
1878 locations->SetOut(Location::RequiresRegister());
1879 }
1880
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1881 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1882 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1883 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1884 }
1885
SelectCanUseCMOV(HSelect * select)1886 static bool SelectCanUseCMOV(HSelect* select) {
1887 // There are no conditional move instructions for XMMs.
1888 if (DataType::IsFloatingPointType(select->GetType())) {
1889 return false;
1890 }
1891
1892 // A FP condition doesn't generate the single CC that we need.
1893 HInstruction* condition = select->GetCondition();
1894 if (condition->IsCondition() &&
1895 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1896 return false;
1897 }
1898
1899 // We can generate a CMOV for this Select.
1900 return true;
1901 }
1902
VisitSelect(HSelect * select)1903 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1904 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1905 if (DataType::IsFloatingPointType(select->GetType())) {
1906 locations->SetInAt(0, Location::RequiresFpuRegister());
1907 locations->SetInAt(1, Location::Any());
1908 } else {
1909 locations->SetInAt(0, Location::RequiresRegister());
1910 if (SelectCanUseCMOV(select)) {
1911 if (select->InputAt(1)->IsConstant()) {
1912 locations->SetInAt(1, Location::RequiresRegister());
1913 } else {
1914 locations->SetInAt(1, Location::Any());
1915 }
1916 } else {
1917 locations->SetInAt(1, Location::Any());
1918 }
1919 }
1920 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1921 locations->SetInAt(2, Location::RequiresRegister());
1922 }
1923 locations->SetOut(Location::SameAsFirstInput());
1924 }
1925
VisitSelect(HSelect * select)1926 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1927 LocationSummary* locations = select->GetLocations();
1928 if (SelectCanUseCMOV(select)) {
1929 // If both the condition and the source types are integer, we can generate
1930 // a CMOV to implement Select.
1931 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1932 Location value_true_loc = locations->InAt(1);
1933 DCHECK(locations->InAt(0).Equals(locations->Out()));
1934
1935 HInstruction* select_condition = select->GetCondition();
1936 Condition cond = kNotEqual;
1937
1938 // Figure out how to test the 'condition'.
1939 if (select_condition->IsCondition()) {
1940 HCondition* condition = select_condition->AsCondition();
1941 if (!condition->IsEmittedAtUseSite()) {
1942 // This was a previously materialized condition.
1943 // Can we use the existing condition code?
1944 if (AreEflagsSetFrom(condition, select)) {
1945 // Materialization was the previous instruction. Condition codes are right.
1946 cond = X86_64IntegerCondition(condition->GetCondition());
1947 } else {
1948 // No, we have to recreate the condition code.
1949 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1950 __ testl(cond_reg, cond_reg);
1951 }
1952 } else {
1953 GenerateCompareTest(condition);
1954 cond = X86_64IntegerCondition(condition->GetCondition());
1955 }
1956 } else {
1957 // Must be a Boolean condition, which needs to be compared to 0.
1958 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1959 __ testl(cond_reg, cond_reg);
1960 }
1961
1962 // If the condition is true, overwrite the output, which already contains false.
1963 // Generate the correct sized CMOV.
1964 bool is_64_bit = DataType::Is64BitType(select->GetType());
1965 if (value_true_loc.IsRegister()) {
1966 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1967 } else {
1968 __ cmov(cond,
1969 value_false,
1970 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1971 }
1972 } else {
1973 NearLabel false_target;
1974 GenerateTestAndBranch<NearLabel>(select,
1975 /* condition_input_index= */ 2,
1976 /* true_target= */ nullptr,
1977 &false_target);
1978 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1979 __ Bind(&false_target);
1980 }
1981 }
1982
VisitNativeDebugInfo(HNativeDebugInfo * info)1983 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1984 new (GetGraph()->GetAllocator()) LocationSummary(info);
1985 }
1986
VisitNativeDebugInfo(HNativeDebugInfo *)1987 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1988 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1989 }
1990
GenerateNop()1991 void CodeGeneratorX86_64::GenerateNop() {
1992 __ nop();
1993 }
1994
HandleCondition(HCondition * cond)1995 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1996 LocationSummary* locations =
1997 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1998 // Handle the long/FP comparisons made in instruction simplification.
1999 switch (cond->InputAt(0)->GetType()) {
2000 case DataType::Type::kInt64:
2001 locations->SetInAt(0, Location::RequiresRegister());
2002 locations->SetInAt(1, Location::Any());
2003 break;
2004 case DataType::Type::kFloat32:
2005 case DataType::Type::kFloat64:
2006 locations->SetInAt(0, Location::RequiresFpuRegister());
2007 locations->SetInAt(1, Location::Any());
2008 break;
2009 default:
2010 locations->SetInAt(0, Location::RequiresRegister());
2011 locations->SetInAt(1, Location::Any());
2012 break;
2013 }
2014 if (!cond->IsEmittedAtUseSite()) {
2015 locations->SetOut(Location::RequiresRegister());
2016 }
2017 }
2018
HandleCondition(HCondition * cond)2019 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2020 if (cond->IsEmittedAtUseSite()) {
2021 return;
2022 }
2023
2024 LocationSummary* locations = cond->GetLocations();
2025 Location lhs = locations->InAt(0);
2026 Location rhs = locations->InAt(1);
2027 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2028 NearLabel true_label, false_label;
2029
2030 switch (cond->InputAt(0)->GetType()) {
2031 default:
2032 // Integer case.
2033
2034 // Clear output register: setcc only sets the low byte.
2035 __ xorl(reg, reg);
2036
2037 codegen_->GenerateIntCompare(lhs, rhs);
2038 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2039 return;
2040 case DataType::Type::kInt64:
2041 // Clear output register: setcc only sets the low byte.
2042 __ xorl(reg, reg);
2043
2044 codegen_->GenerateLongCompare(lhs, rhs);
2045 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2046 return;
2047 case DataType::Type::kFloat32: {
2048 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2049 if (rhs.IsConstant()) {
2050 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2051 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2052 } else if (rhs.IsStackSlot()) {
2053 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2054 } else {
2055 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2056 }
2057 GenerateFPJumps(cond, &true_label, &false_label);
2058 break;
2059 }
2060 case DataType::Type::kFloat64: {
2061 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2062 if (rhs.IsConstant()) {
2063 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2064 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2065 } else if (rhs.IsDoubleStackSlot()) {
2066 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2067 } else {
2068 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2069 }
2070 GenerateFPJumps(cond, &true_label, &false_label);
2071 break;
2072 }
2073 }
2074
2075 // Convert the jumps into the result.
2076 NearLabel done_label;
2077
2078 // False case: result = 0.
2079 __ Bind(&false_label);
2080 __ xorl(reg, reg);
2081 __ jmp(&done_label);
2082
2083 // True case: result = 1.
2084 __ Bind(&true_label);
2085 __ movl(reg, Immediate(1));
2086 __ Bind(&done_label);
2087 }
2088
VisitEqual(HEqual * comp)2089 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2090 HandleCondition(comp);
2091 }
2092
VisitEqual(HEqual * comp)2093 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2094 HandleCondition(comp);
2095 }
2096
VisitNotEqual(HNotEqual * comp)2097 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2098 HandleCondition(comp);
2099 }
2100
VisitNotEqual(HNotEqual * comp)2101 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2102 HandleCondition(comp);
2103 }
2104
VisitLessThan(HLessThan * comp)2105 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2106 HandleCondition(comp);
2107 }
2108
VisitLessThan(HLessThan * comp)2109 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2110 HandleCondition(comp);
2111 }
2112
VisitLessThanOrEqual(HLessThanOrEqual * comp)2113 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2114 HandleCondition(comp);
2115 }
2116
VisitLessThanOrEqual(HLessThanOrEqual * comp)2117 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2118 HandleCondition(comp);
2119 }
2120
VisitGreaterThan(HGreaterThan * comp)2121 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2122 HandleCondition(comp);
2123 }
2124
VisitGreaterThan(HGreaterThan * comp)2125 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2126 HandleCondition(comp);
2127 }
2128
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2129 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2130 HandleCondition(comp);
2131 }
2132
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2133 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2134 HandleCondition(comp);
2135 }
2136
VisitBelow(HBelow * comp)2137 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2138 HandleCondition(comp);
2139 }
2140
VisitBelow(HBelow * comp)2141 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2142 HandleCondition(comp);
2143 }
2144
VisitBelowOrEqual(HBelowOrEqual * comp)2145 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2146 HandleCondition(comp);
2147 }
2148
VisitBelowOrEqual(HBelowOrEqual * comp)2149 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2150 HandleCondition(comp);
2151 }
2152
VisitAbove(HAbove * comp)2153 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2154 HandleCondition(comp);
2155 }
2156
VisitAbove(HAbove * comp)2157 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2158 HandleCondition(comp);
2159 }
2160
VisitAboveOrEqual(HAboveOrEqual * comp)2161 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2162 HandleCondition(comp);
2163 }
2164
VisitAboveOrEqual(HAboveOrEqual * comp)2165 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2166 HandleCondition(comp);
2167 }
2168
VisitCompare(HCompare * compare)2169 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2170 LocationSummary* locations =
2171 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2172 switch (compare->InputAt(0)->GetType()) {
2173 case DataType::Type::kBool:
2174 case DataType::Type::kUint8:
2175 case DataType::Type::kInt8:
2176 case DataType::Type::kUint16:
2177 case DataType::Type::kInt16:
2178 case DataType::Type::kInt32:
2179 case DataType::Type::kInt64: {
2180 locations->SetInAt(0, Location::RequiresRegister());
2181 locations->SetInAt(1, Location::Any());
2182 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2183 break;
2184 }
2185 case DataType::Type::kFloat32:
2186 case DataType::Type::kFloat64: {
2187 locations->SetInAt(0, Location::RequiresFpuRegister());
2188 locations->SetInAt(1, Location::Any());
2189 locations->SetOut(Location::RequiresRegister());
2190 break;
2191 }
2192 default:
2193 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2194 }
2195 }
2196
VisitCompare(HCompare * compare)2197 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2198 LocationSummary* locations = compare->GetLocations();
2199 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2200 Location left = locations->InAt(0);
2201 Location right = locations->InAt(1);
2202
2203 NearLabel less, greater, done;
2204 DataType::Type type = compare->InputAt(0)->GetType();
2205 Condition less_cond = kLess;
2206
2207 switch (type) {
2208 case DataType::Type::kBool:
2209 case DataType::Type::kUint8:
2210 case DataType::Type::kInt8:
2211 case DataType::Type::kUint16:
2212 case DataType::Type::kInt16:
2213 case DataType::Type::kInt32: {
2214 codegen_->GenerateIntCompare(left, right);
2215 break;
2216 }
2217 case DataType::Type::kInt64: {
2218 codegen_->GenerateLongCompare(left, right);
2219 break;
2220 }
2221 case DataType::Type::kFloat32: {
2222 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2223 if (right.IsConstant()) {
2224 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2225 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2226 } else if (right.IsStackSlot()) {
2227 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2228 } else {
2229 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2230 }
2231 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2232 less_cond = kBelow; // ucomis{s,d} sets CF
2233 break;
2234 }
2235 case DataType::Type::kFloat64: {
2236 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2237 if (right.IsConstant()) {
2238 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2239 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2240 } else if (right.IsDoubleStackSlot()) {
2241 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2242 } else {
2243 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2244 }
2245 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2246 less_cond = kBelow; // ucomis{s,d} sets CF
2247 break;
2248 }
2249 default:
2250 LOG(FATAL) << "Unexpected compare type " << type;
2251 }
2252
2253 __ movl(out, Immediate(0));
2254 __ j(kEqual, &done);
2255 __ j(less_cond, &less);
2256
2257 __ Bind(&greater);
2258 __ movl(out, Immediate(1));
2259 __ jmp(&done);
2260
2261 __ Bind(&less);
2262 __ movl(out, Immediate(-1));
2263
2264 __ Bind(&done);
2265 }
2266
VisitIntConstant(HIntConstant * constant)2267 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2268 LocationSummary* locations =
2269 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2270 locations->SetOut(Location::ConstantLocation(constant));
2271 }
2272
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2273 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2274 // Will be generated at use site.
2275 }
2276
VisitNullConstant(HNullConstant * constant)2277 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2278 LocationSummary* locations =
2279 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2280 locations->SetOut(Location::ConstantLocation(constant));
2281 }
2282
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2283 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2284 // Will be generated at use site.
2285 }
2286
VisitLongConstant(HLongConstant * constant)2287 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2288 LocationSummary* locations =
2289 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2290 locations->SetOut(Location::ConstantLocation(constant));
2291 }
2292
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2293 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2294 // Will be generated at use site.
2295 }
2296
VisitFloatConstant(HFloatConstant * constant)2297 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2298 LocationSummary* locations =
2299 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2300 locations->SetOut(Location::ConstantLocation(constant));
2301 }
2302
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2303 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2304 // Will be generated at use site.
2305 }
2306
VisitDoubleConstant(HDoubleConstant * constant)2307 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2308 LocationSummary* locations =
2309 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2310 locations->SetOut(Location::ConstantLocation(constant));
2311 }
2312
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2313 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2314 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2315 // Will be generated at use site.
2316 }
2317
VisitConstructorFence(HConstructorFence * constructor_fence)2318 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2319 constructor_fence->SetLocations(nullptr);
2320 }
2321
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2322 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2323 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2324 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2325 }
2326
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2327 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2328 memory_barrier->SetLocations(nullptr);
2329 }
2330
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2331 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2332 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2333 }
2334
VisitReturnVoid(HReturnVoid * ret)2335 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2336 ret->SetLocations(nullptr);
2337 }
2338
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2339 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2340 codegen_->GenerateFrameExit();
2341 }
2342
VisitReturn(HReturn * ret)2343 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2344 LocationSummary* locations =
2345 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2346 switch (ret->InputAt(0)->GetType()) {
2347 case DataType::Type::kReference:
2348 case DataType::Type::kBool:
2349 case DataType::Type::kUint8:
2350 case DataType::Type::kInt8:
2351 case DataType::Type::kUint16:
2352 case DataType::Type::kInt16:
2353 case DataType::Type::kInt32:
2354 case DataType::Type::kInt64:
2355 locations->SetInAt(0, Location::RegisterLocation(RAX));
2356 break;
2357
2358 case DataType::Type::kFloat32:
2359 case DataType::Type::kFloat64:
2360 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2361 break;
2362
2363 default:
2364 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2365 }
2366 }
2367
VisitReturn(HReturn * ret)2368 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2369 switch (ret->InputAt(0)->GetType()) {
2370 case DataType::Type::kReference:
2371 case DataType::Type::kBool:
2372 case DataType::Type::kUint8:
2373 case DataType::Type::kInt8:
2374 case DataType::Type::kUint16:
2375 case DataType::Type::kInt16:
2376 case DataType::Type::kInt32:
2377 case DataType::Type::kInt64:
2378 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2379 break;
2380
2381 case DataType::Type::kFloat32: {
2382 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2383 XMM0);
2384 // To simplify callers of an OSR method, we put the return value in both
2385 // floating point and core register.
2386 if (GetGraph()->IsCompilingOsr()) {
2387 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2388 }
2389 break;
2390 }
2391 case DataType::Type::kFloat64: {
2392 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2393 XMM0);
2394 // To simplify callers of an OSR method, we put the return value in both
2395 // floating point and core register.
2396 if (GetGraph()->IsCompilingOsr()) {
2397 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2398 }
2399 break;
2400 }
2401
2402 default:
2403 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2404 }
2405 codegen_->GenerateFrameExit();
2406 }
2407
GetReturnLocation(DataType::Type type) const2408 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2409 switch (type) {
2410 case DataType::Type::kReference:
2411 case DataType::Type::kBool:
2412 case DataType::Type::kUint8:
2413 case DataType::Type::kInt8:
2414 case DataType::Type::kUint16:
2415 case DataType::Type::kInt16:
2416 case DataType::Type::kUint32:
2417 case DataType::Type::kInt32:
2418 case DataType::Type::kUint64:
2419 case DataType::Type::kInt64:
2420 return Location::RegisterLocation(RAX);
2421
2422 case DataType::Type::kVoid:
2423 return Location::NoLocation();
2424
2425 case DataType::Type::kFloat64:
2426 case DataType::Type::kFloat32:
2427 return Location::FpuRegisterLocation(XMM0);
2428 }
2429
2430 UNREACHABLE();
2431 }
2432
GetMethodLocation() const2433 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2434 return Location::RegisterLocation(kMethodRegisterArgument);
2435 }
2436
GetNextLocation(DataType::Type type)2437 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2438 switch (type) {
2439 case DataType::Type::kReference:
2440 case DataType::Type::kBool:
2441 case DataType::Type::kUint8:
2442 case DataType::Type::kInt8:
2443 case DataType::Type::kUint16:
2444 case DataType::Type::kInt16:
2445 case DataType::Type::kInt32: {
2446 uint32_t index = gp_index_++;
2447 stack_index_++;
2448 if (index < calling_convention.GetNumberOfRegisters()) {
2449 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2450 } else {
2451 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2452 }
2453 }
2454
2455 case DataType::Type::kInt64: {
2456 uint32_t index = gp_index_;
2457 stack_index_ += 2;
2458 if (index < calling_convention.GetNumberOfRegisters()) {
2459 gp_index_ += 1;
2460 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2461 } else {
2462 gp_index_ += 2;
2463 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2464 }
2465 }
2466
2467 case DataType::Type::kFloat32: {
2468 uint32_t index = float_index_++;
2469 stack_index_++;
2470 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2471 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2472 } else {
2473 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2474 }
2475 }
2476
2477 case DataType::Type::kFloat64: {
2478 uint32_t index = float_index_++;
2479 stack_index_ += 2;
2480 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2481 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2482 } else {
2483 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2484 }
2485 }
2486
2487 case DataType::Type::kUint32:
2488 case DataType::Type::kUint64:
2489 case DataType::Type::kVoid:
2490 LOG(FATAL) << "Unexpected parameter type " << type;
2491 UNREACHABLE();
2492 }
2493 return Location::NoLocation();
2494 }
2495
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2496 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2497 // The trampoline uses the same calling convention as dex calling conventions,
2498 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2499 // the method_idx.
2500 HandleInvoke(invoke);
2501 }
2502
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2503 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2504 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2505 }
2506
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2507 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2508 // Explicit clinit checks triggered by static invokes must have been pruned by
2509 // art::PrepareForRegisterAllocation.
2510 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2511
2512 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2513 if (intrinsic.TryDispatch(invoke)) {
2514 return;
2515 }
2516
2517 HandleInvoke(invoke);
2518 }
2519
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2520 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2521 if (invoke->GetLocations()->Intrinsified()) {
2522 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2523 intrinsic.Dispatch(invoke);
2524 return true;
2525 }
2526 return false;
2527 }
2528
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2529 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2530 // Explicit clinit checks triggered by static invokes must have been pruned by
2531 // art::PrepareForRegisterAllocation.
2532 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2533
2534 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2535 return;
2536 }
2537
2538 LocationSummary* locations = invoke->GetLocations();
2539 codegen_->GenerateStaticOrDirectCall(
2540 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2541 }
2542
HandleInvoke(HInvoke * invoke)2543 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2544 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2545 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2546 }
2547
VisitInvokeVirtual(HInvokeVirtual * invoke)2548 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2549 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2550 if (intrinsic.TryDispatch(invoke)) {
2551 return;
2552 }
2553
2554 HandleInvoke(invoke);
2555 }
2556
VisitInvokeVirtual(HInvokeVirtual * invoke)2557 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2558 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2559 return;
2560 }
2561
2562 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2563 DCHECK(!codegen_->IsLeafMethod());
2564 }
2565
VisitInvokeInterface(HInvokeInterface * invoke)2566 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2567 HandleInvoke(invoke);
2568 // Add the hidden argument.
2569 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2570 }
2571
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2572 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2573 CpuRegister klass) {
2574 DCHECK_EQ(RDI, klass.AsRegister());
2575 // We know the destination of an intrinsic, so no need to record inline
2576 // caches.
2577 if (!instruction->GetLocations()->Intrinsified() &&
2578 GetGraph()->IsCompilingBaseline() &&
2579 !Runtime::Current()->IsAotCompiler()) {
2580 ScopedObjectAccess soa(Thread::Current());
2581 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2582 if (info != nullptr) {
2583 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2584 uint64_t address = reinterpret_cast64<uint64_t>(cache);
2585 NearLabel done;
2586 __ movq(CpuRegister(TMP), Immediate(address));
2587 // Fast path for a monomorphic cache.
2588 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2589 __ j(kEqual, &done);
2590 GenerateInvokeRuntime(
2591 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2592 __ Bind(&done);
2593 }
2594 }
2595 }
2596
VisitInvokeInterface(HInvokeInterface * invoke)2597 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2598 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2599 LocationSummary* locations = invoke->GetLocations();
2600 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2601 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2602 Location receiver = locations->InAt(0);
2603 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2604
2605 if (receiver.IsStackSlot()) {
2606 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2607 // /* HeapReference<Class> */ temp = temp->klass_
2608 __ movl(temp, Address(temp, class_offset));
2609 } else {
2610 // /* HeapReference<Class> */ temp = receiver->klass_
2611 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2612 }
2613 codegen_->MaybeRecordImplicitNullCheck(invoke);
2614 // Instead of simply (possibly) unpoisoning `temp` here, we should
2615 // emit a read barrier for the previous class reference load.
2616 // However this is not required in practice, as this is an
2617 // intermediate/temporary reference and because the current
2618 // concurrent copying collector keeps the from-space memory
2619 // intact/accessible until the end of the marking phase (the
2620 // concurrent copying collector may not in the future).
2621 __ MaybeUnpoisonHeapReference(temp);
2622
2623 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2624
2625 // Set the hidden argument. This is safe to do this here, as RAX
2626 // won't be modified thereafter, before the `call` instruction.
2627 // We also di it after MaybeGenerateInlineCache that may use RAX.
2628 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2629 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2630
2631 // temp = temp->GetAddressOfIMT()
2632 __ movq(temp,
2633 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2634 // temp = temp->GetImtEntryAt(method_offset);
2635 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2636 invoke->GetImtIndex(), kX86_64PointerSize));
2637 // temp = temp->GetImtEntryAt(method_offset);
2638 __ movq(temp, Address(temp, method_offset));
2639 // call temp->GetEntryPoint();
2640 __ call(Address(
2641 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2642
2643 DCHECK(!codegen_->IsLeafMethod());
2644 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2645 }
2646
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2647 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2648 HandleInvoke(invoke);
2649 }
2650
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2651 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2652 codegen_->GenerateInvokePolymorphicCall(invoke);
2653 }
2654
VisitInvokeCustom(HInvokeCustom * invoke)2655 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2656 HandleInvoke(invoke);
2657 }
2658
VisitInvokeCustom(HInvokeCustom * invoke)2659 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2660 codegen_->GenerateInvokeCustomCall(invoke);
2661 }
2662
VisitNeg(HNeg * neg)2663 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2664 LocationSummary* locations =
2665 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2666 switch (neg->GetResultType()) {
2667 case DataType::Type::kInt32:
2668 case DataType::Type::kInt64:
2669 locations->SetInAt(0, Location::RequiresRegister());
2670 locations->SetOut(Location::SameAsFirstInput());
2671 break;
2672
2673 case DataType::Type::kFloat32:
2674 case DataType::Type::kFloat64:
2675 locations->SetInAt(0, Location::RequiresFpuRegister());
2676 locations->SetOut(Location::SameAsFirstInput());
2677 locations->AddTemp(Location::RequiresFpuRegister());
2678 break;
2679
2680 default:
2681 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2682 }
2683 }
2684
VisitNeg(HNeg * neg)2685 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2686 LocationSummary* locations = neg->GetLocations();
2687 Location out = locations->Out();
2688 Location in = locations->InAt(0);
2689 switch (neg->GetResultType()) {
2690 case DataType::Type::kInt32:
2691 DCHECK(in.IsRegister());
2692 DCHECK(in.Equals(out));
2693 __ negl(out.AsRegister<CpuRegister>());
2694 break;
2695
2696 case DataType::Type::kInt64:
2697 DCHECK(in.IsRegister());
2698 DCHECK(in.Equals(out));
2699 __ negq(out.AsRegister<CpuRegister>());
2700 break;
2701
2702 case DataType::Type::kFloat32: {
2703 DCHECK(in.Equals(out));
2704 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2705 // Implement float negation with an exclusive or with value
2706 // 0x80000000 (mask for bit 31, representing the sign of a
2707 // single-precision floating-point number).
2708 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2709 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2710 break;
2711 }
2712
2713 case DataType::Type::kFloat64: {
2714 DCHECK(in.Equals(out));
2715 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2716 // Implement double negation with an exclusive or with value
2717 // 0x8000000000000000 (mask for bit 63, representing the sign of
2718 // a double-precision floating-point number).
2719 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2720 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2721 break;
2722 }
2723
2724 default:
2725 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2726 }
2727 }
2728
VisitTypeConversion(HTypeConversion * conversion)2729 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2730 LocationSummary* locations =
2731 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2732 DataType::Type result_type = conversion->GetResultType();
2733 DataType::Type input_type = conversion->GetInputType();
2734 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2735 << input_type << " -> " << result_type;
2736
2737 switch (result_type) {
2738 case DataType::Type::kUint8:
2739 case DataType::Type::kInt8:
2740 case DataType::Type::kUint16:
2741 case DataType::Type::kInt16:
2742 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2743 locations->SetInAt(0, Location::Any());
2744 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2745 break;
2746
2747 case DataType::Type::kInt32:
2748 switch (input_type) {
2749 case DataType::Type::kInt64:
2750 locations->SetInAt(0, Location::Any());
2751 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2752 break;
2753
2754 case DataType::Type::kFloat32:
2755 locations->SetInAt(0, Location::RequiresFpuRegister());
2756 locations->SetOut(Location::RequiresRegister());
2757 break;
2758
2759 case DataType::Type::kFloat64:
2760 locations->SetInAt(0, Location::RequiresFpuRegister());
2761 locations->SetOut(Location::RequiresRegister());
2762 break;
2763
2764 default:
2765 LOG(FATAL) << "Unexpected type conversion from " << input_type
2766 << " to " << result_type;
2767 }
2768 break;
2769
2770 case DataType::Type::kInt64:
2771 switch (input_type) {
2772 case DataType::Type::kBool:
2773 case DataType::Type::kUint8:
2774 case DataType::Type::kInt8:
2775 case DataType::Type::kUint16:
2776 case DataType::Type::kInt16:
2777 case DataType::Type::kInt32:
2778 // TODO: We would benefit from a (to-be-implemented)
2779 // Location::RegisterOrStackSlot requirement for this input.
2780 locations->SetInAt(0, Location::RequiresRegister());
2781 locations->SetOut(Location::RequiresRegister());
2782 break;
2783
2784 case DataType::Type::kFloat32:
2785 locations->SetInAt(0, Location::RequiresFpuRegister());
2786 locations->SetOut(Location::RequiresRegister());
2787 break;
2788
2789 case DataType::Type::kFloat64:
2790 locations->SetInAt(0, Location::RequiresFpuRegister());
2791 locations->SetOut(Location::RequiresRegister());
2792 break;
2793
2794 default:
2795 LOG(FATAL) << "Unexpected type conversion from " << input_type
2796 << " to " << result_type;
2797 }
2798 break;
2799
2800 case DataType::Type::kFloat32:
2801 switch (input_type) {
2802 case DataType::Type::kBool:
2803 case DataType::Type::kUint8:
2804 case DataType::Type::kInt8:
2805 case DataType::Type::kUint16:
2806 case DataType::Type::kInt16:
2807 case DataType::Type::kInt32:
2808 locations->SetInAt(0, Location::Any());
2809 locations->SetOut(Location::RequiresFpuRegister());
2810 break;
2811
2812 case DataType::Type::kInt64:
2813 locations->SetInAt(0, Location::Any());
2814 locations->SetOut(Location::RequiresFpuRegister());
2815 break;
2816
2817 case DataType::Type::kFloat64:
2818 locations->SetInAt(0, Location::Any());
2819 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2820 break;
2821
2822 default:
2823 LOG(FATAL) << "Unexpected type conversion from " << input_type
2824 << " to " << result_type;
2825 }
2826 break;
2827
2828 case DataType::Type::kFloat64:
2829 switch (input_type) {
2830 case DataType::Type::kBool:
2831 case DataType::Type::kUint8:
2832 case DataType::Type::kInt8:
2833 case DataType::Type::kUint16:
2834 case DataType::Type::kInt16:
2835 case DataType::Type::kInt32:
2836 locations->SetInAt(0, Location::Any());
2837 locations->SetOut(Location::RequiresFpuRegister());
2838 break;
2839
2840 case DataType::Type::kInt64:
2841 locations->SetInAt(0, Location::Any());
2842 locations->SetOut(Location::RequiresFpuRegister());
2843 break;
2844
2845 case DataType::Type::kFloat32:
2846 locations->SetInAt(0, Location::Any());
2847 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2848 break;
2849
2850 default:
2851 LOG(FATAL) << "Unexpected type conversion from " << input_type
2852 << " to " << result_type;
2853 }
2854 break;
2855
2856 default:
2857 LOG(FATAL) << "Unexpected type conversion from " << input_type
2858 << " to " << result_type;
2859 }
2860 }
2861
VisitTypeConversion(HTypeConversion * conversion)2862 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2863 LocationSummary* locations = conversion->GetLocations();
2864 Location out = locations->Out();
2865 Location in = locations->InAt(0);
2866 DataType::Type result_type = conversion->GetResultType();
2867 DataType::Type input_type = conversion->GetInputType();
2868 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2869 << input_type << " -> " << result_type;
2870 switch (result_type) {
2871 case DataType::Type::kUint8:
2872 switch (input_type) {
2873 case DataType::Type::kInt8:
2874 case DataType::Type::kUint16:
2875 case DataType::Type::kInt16:
2876 case DataType::Type::kInt32:
2877 case DataType::Type::kInt64:
2878 if (in.IsRegister()) {
2879 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2880 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2881 __ movzxb(out.AsRegister<CpuRegister>(),
2882 Address(CpuRegister(RSP), in.GetStackIndex()));
2883 } else {
2884 __ movl(out.AsRegister<CpuRegister>(),
2885 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2886 }
2887 break;
2888
2889 default:
2890 LOG(FATAL) << "Unexpected type conversion from " << input_type
2891 << " to " << result_type;
2892 }
2893 break;
2894
2895 case DataType::Type::kInt8:
2896 switch (input_type) {
2897 case DataType::Type::kUint8:
2898 case DataType::Type::kUint16:
2899 case DataType::Type::kInt16:
2900 case DataType::Type::kInt32:
2901 case DataType::Type::kInt64:
2902 if (in.IsRegister()) {
2903 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2904 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2905 __ movsxb(out.AsRegister<CpuRegister>(),
2906 Address(CpuRegister(RSP), in.GetStackIndex()));
2907 } else {
2908 __ movl(out.AsRegister<CpuRegister>(),
2909 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2910 }
2911 break;
2912
2913 default:
2914 LOG(FATAL) << "Unexpected type conversion from " << input_type
2915 << " to " << result_type;
2916 }
2917 break;
2918
2919 case DataType::Type::kUint16:
2920 switch (input_type) {
2921 case DataType::Type::kInt8:
2922 case DataType::Type::kInt16:
2923 case DataType::Type::kInt32:
2924 case DataType::Type::kInt64:
2925 if (in.IsRegister()) {
2926 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2927 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2928 __ movzxw(out.AsRegister<CpuRegister>(),
2929 Address(CpuRegister(RSP), in.GetStackIndex()));
2930 } else {
2931 __ movl(out.AsRegister<CpuRegister>(),
2932 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2933 }
2934 break;
2935
2936 default:
2937 LOG(FATAL) << "Unexpected type conversion from " << input_type
2938 << " to " << result_type;
2939 }
2940 break;
2941
2942 case DataType::Type::kInt16:
2943 switch (input_type) {
2944 case DataType::Type::kUint16:
2945 case DataType::Type::kInt32:
2946 case DataType::Type::kInt64:
2947 if (in.IsRegister()) {
2948 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2949 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2950 __ movsxw(out.AsRegister<CpuRegister>(),
2951 Address(CpuRegister(RSP), in.GetStackIndex()));
2952 } else {
2953 __ movl(out.AsRegister<CpuRegister>(),
2954 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2955 }
2956 break;
2957
2958 default:
2959 LOG(FATAL) << "Unexpected type conversion from " << input_type
2960 << " to " << result_type;
2961 }
2962 break;
2963
2964 case DataType::Type::kInt32:
2965 switch (input_type) {
2966 case DataType::Type::kInt64:
2967 if (in.IsRegister()) {
2968 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2969 } else if (in.IsDoubleStackSlot()) {
2970 __ movl(out.AsRegister<CpuRegister>(),
2971 Address(CpuRegister(RSP), in.GetStackIndex()));
2972 } else {
2973 DCHECK(in.IsConstant());
2974 DCHECK(in.GetConstant()->IsLongConstant());
2975 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2976 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2977 }
2978 break;
2979
2980 case DataType::Type::kFloat32: {
2981 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2982 CpuRegister output = out.AsRegister<CpuRegister>();
2983 NearLabel done, nan;
2984
2985 __ movl(output, Immediate(kPrimIntMax));
2986 // if input >= (float)INT_MAX goto done
2987 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
2988 __ j(kAboveEqual, &done);
2989 // if input == NaN goto nan
2990 __ j(kUnordered, &nan);
2991 // output = float-to-int-truncate(input)
2992 __ cvttss2si(output, input, false);
2993 __ jmp(&done);
2994 __ Bind(&nan);
2995 // output = 0
2996 __ xorl(output, output);
2997 __ Bind(&done);
2998 break;
2999 }
3000
3001 case DataType::Type::kFloat64: {
3002 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3003 CpuRegister output = out.AsRegister<CpuRegister>();
3004 NearLabel done, nan;
3005
3006 __ movl(output, Immediate(kPrimIntMax));
3007 // if input >= (double)INT_MAX goto done
3008 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3009 __ j(kAboveEqual, &done);
3010 // if input == NaN goto nan
3011 __ j(kUnordered, &nan);
3012 // output = double-to-int-truncate(input)
3013 __ cvttsd2si(output, input);
3014 __ jmp(&done);
3015 __ Bind(&nan);
3016 // output = 0
3017 __ xorl(output, output);
3018 __ Bind(&done);
3019 break;
3020 }
3021
3022 default:
3023 LOG(FATAL) << "Unexpected type conversion from " << input_type
3024 << " to " << result_type;
3025 }
3026 break;
3027
3028 case DataType::Type::kInt64:
3029 switch (input_type) {
3030 DCHECK(out.IsRegister());
3031 case DataType::Type::kBool:
3032 case DataType::Type::kUint8:
3033 case DataType::Type::kInt8:
3034 case DataType::Type::kUint16:
3035 case DataType::Type::kInt16:
3036 case DataType::Type::kInt32:
3037 DCHECK(in.IsRegister());
3038 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3039 break;
3040
3041 case DataType::Type::kFloat32: {
3042 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3043 CpuRegister output = out.AsRegister<CpuRegister>();
3044 NearLabel done, nan;
3045
3046 codegen_->Load64BitValue(output, kPrimLongMax);
3047 // if input >= (float)LONG_MAX goto done
3048 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3049 __ j(kAboveEqual, &done);
3050 // if input == NaN goto nan
3051 __ j(kUnordered, &nan);
3052 // output = float-to-long-truncate(input)
3053 __ cvttss2si(output, input, true);
3054 __ jmp(&done);
3055 __ Bind(&nan);
3056 // output = 0
3057 __ xorl(output, output);
3058 __ Bind(&done);
3059 break;
3060 }
3061
3062 case DataType::Type::kFloat64: {
3063 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3064 CpuRegister output = out.AsRegister<CpuRegister>();
3065 NearLabel done, nan;
3066
3067 codegen_->Load64BitValue(output, kPrimLongMax);
3068 // if input >= (double)LONG_MAX goto done
3069 __ comisd(input, codegen_->LiteralDoubleAddress(
3070 static_cast<double>(kPrimLongMax)));
3071 __ j(kAboveEqual, &done);
3072 // if input == NaN goto nan
3073 __ j(kUnordered, &nan);
3074 // output = double-to-long-truncate(input)
3075 __ cvttsd2si(output, input, true);
3076 __ jmp(&done);
3077 __ Bind(&nan);
3078 // output = 0
3079 __ xorl(output, output);
3080 __ Bind(&done);
3081 break;
3082 }
3083
3084 default:
3085 LOG(FATAL) << "Unexpected type conversion from " << input_type
3086 << " to " << result_type;
3087 }
3088 break;
3089
3090 case DataType::Type::kFloat32:
3091 switch (input_type) {
3092 case DataType::Type::kBool:
3093 case DataType::Type::kUint8:
3094 case DataType::Type::kInt8:
3095 case DataType::Type::kUint16:
3096 case DataType::Type::kInt16:
3097 case DataType::Type::kInt32:
3098 if (in.IsRegister()) {
3099 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3100 } else if (in.IsConstant()) {
3101 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3102 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3103 codegen_->Load32BitValue(dest, static_cast<float>(v));
3104 } else {
3105 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3106 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3107 }
3108 break;
3109
3110 case DataType::Type::kInt64:
3111 if (in.IsRegister()) {
3112 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3113 } else if (in.IsConstant()) {
3114 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3115 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3116 codegen_->Load32BitValue(dest, static_cast<float>(v));
3117 } else {
3118 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3119 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3120 }
3121 break;
3122
3123 case DataType::Type::kFloat64:
3124 if (in.IsFpuRegister()) {
3125 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3126 } else if (in.IsConstant()) {
3127 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3128 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3129 codegen_->Load32BitValue(dest, static_cast<float>(v));
3130 } else {
3131 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3132 Address(CpuRegister(RSP), in.GetStackIndex()));
3133 }
3134 break;
3135
3136 default:
3137 LOG(FATAL) << "Unexpected type conversion from " << input_type
3138 << " to " << result_type;
3139 }
3140 break;
3141
3142 case DataType::Type::kFloat64:
3143 switch (input_type) {
3144 case DataType::Type::kBool:
3145 case DataType::Type::kUint8:
3146 case DataType::Type::kInt8:
3147 case DataType::Type::kUint16:
3148 case DataType::Type::kInt16:
3149 case DataType::Type::kInt32:
3150 if (in.IsRegister()) {
3151 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3152 } else if (in.IsConstant()) {
3153 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3154 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3155 codegen_->Load64BitValue(dest, static_cast<double>(v));
3156 } else {
3157 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3158 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3159 }
3160 break;
3161
3162 case DataType::Type::kInt64:
3163 if (in.IsRegister()) {
3164 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3165 } else if (in.IsConstant()) {
3166 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3167 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3168 codegen_->Load64BitValue(dest, static_cast<double>(v));
3169 } else {
3170 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3171 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3172 }
3173 break;
3174
3175 case DataType::Type::kFloat32:
3176 if (in.IsFpuRegister()) {
3177 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3178 } else if (in.IsConstant()) {
3179 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3180 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3181 codegen_->Load64BitValue(dest, static_cast<double>(v));
3182 } else {
3183 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3184 Address(CpuRegister(RSP), in.GetStackIndex()));
3185 }
3186 break;
3187
3188 default:
3189 LOG(FATAL) << "Unexpected type conversion from " << input_type
3190 << " to " << result_type;
3191 }
3192 break;
3193
3194 default:
3195 LOG(FATAL) << "Unexpected type conversion from " << input_type
3196 << " to " << result_type;
3197 }
3198 }
3199
VisitAdd(HAdd * add)3200 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3201 LocationSummary* locations =
3202 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3203 switch (add->GetResultType()) {
3204 case DataType::Type::kInt32: {
3205 locations->SetInAt(0, Location::RequiresRegister());
3206 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3207 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3208 break;
3209 }
3210
3211 case DataType::Type::kInt64: {
3212 locations->SetInAt(0, Location::RequiresRegister());
3213 // We can use a leaq or addq if the constant can fit in an immediate.
3214 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3215 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3216 break;
3217 }
3218
3219 case DataType::Type::kFloat64:
3220 case DataType::Type::kFloat32: {
3221 locations->SetInAt(0, Location::RequiresFpuRegister());
3222 locations->SetInAt(1, Location::Any());
3223 locations->SetOut(Location::SameAsFirstInput());
3224 break;
3225 }
3226
3227 default:
3228 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3229 }
3230 }
3231
VisitAdd(HAdd * add)3232 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3233 LocationSummary* locations = add->GetLocations();
3234 Location first = locations->InAt(0);
3235 Location second = locations->InAt(1);
3236 Location out = locations->Out();
3237
3238 switch (add->GetResultType()) {
3239 case DataType::Type::kInt32: {
3240 if (second.IsRegister()) {
3241 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3242 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3243 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3244 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3245 } else {
3246 __ leal(out.AsRegister<CpuRegister>(), Address(
3247 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3248 }
3249 } else if (second.IsConstant()) {
3250 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3251 __ addl(out.AsRegister<CpuRegister>(),
3252 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3253 } else {
3254 __ leal(out.AsRegister<CpuRegister>(), Address(
3255 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3256 }
3257 } else {
3258 DCHECK(first.Equals(locations->Out()));
3259 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3260 }
3261 break;
3262 }
3263
3264 case DataType::Type::kInt64: {
3265 if (second.IsRegister()) {
3266 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3267 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3268 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3269 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3270 } else {
3271 __ leaq(out.AsRegister<CpuRegister>(), Address(
3272 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3273 }
3274 } else {
3275 DCHECK(second.IsConstant());
3276 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3277 int32_t int32_value = Low32Bits(value);
3278 DCHECK_EQ(int32_value, value);
3279 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3280 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3281 } else {
3282 __ leaq(out.AsRegister<CpuRegister>(), Address(
3283 first.AsRegister<CpuRegister>(), int32_value));
3284 }
3285 }
3286 break;
3287 }
3288
3289 case DataType::Type::kFloat32: {
3290 if (second.IsFpuRegister()) {
3291 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3292 } else if (second.IsConstant()) {
3293 __ addss(first.AsFpuRegister<XmmRegister>(),
3294 codegen_->LiteralFloatAddress(
3295 second.GetConstant()->AsFloatConstant()->GetValue()));
3296 } else {
3297 DCHECK(second.IsStackSlot());
3298 __ addss(first.AsFpuRegister<XmmRegister>(),
3299 Address(CpuRegister(RSP), second.GetStackIndex()));
3300 }
3301 break;
3302 }
3303
3304 case DataType::Type::kFloat64: {
3305 if (second.IsFpuRegister()) {
3306 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3307 } else if (second.IsConstant()) {
3308 __ addsd(first.AsFpuRegister<XmmRegister>(),
3309 codegen_->LiteralDoubleAddress(
3310 second.GetConstant()->AsDoubleConstant()->GetValue()));
3311 } else {
3312 DCHECK(second.IsDoubleStackSlot());
3313 __ addsd(first.AsFpuRegister<XmmRegister>(),
3314 Address(CpuRegister(RSP), second.GetStackIndex()));
3315 }
3316 break;
3317 }
3318
3319 default:
3320 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3321 }
3322 }
3323
VisitSub(HSub * sub)3324 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3325 LocationSummary* locations =
3326 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3327 switch (sub->GetResultType()) {
3328 case DataType::Type::kInt32: {
3329 locations->SetInAt(0, Location::RequiresRegister());
3330 locations->SetInAt(1, Location::Any());
3331 locations->SetOut(Location::SameAsFirstInput());
3332 break;
3333 }
3334 case DataType::Type::kInt64: {
3335 locations->SetInAt(0, Location::RequiresRegister());
3336 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3337 locations->SetOut(Location::SameAsFirstInput());
3338 break;
3339 }
3340 case DataType::Type::kFloat32:
3341 case DataType::Type::kFloat64: {
3342 locations->SetInAt(0, Location::RequiresFpuRegister());
3343 locations->SetInAt(1, Location::Any());
3344 locations->SetOut(Location::SameAsFirstInput());
3345 break;
3346 }
3347 default:
3348 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3349 }
3350 }
3351
VisitSub(HSub * sub)3352 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3353 LocationSummary* locations = sub->GetLocations();
3354 Location first = locations->InAt(0);
3355 Location second = locations->InAt(1);
3356 DCHECK(first.Equals(locations->Out()));
3357 switch (sub->GetResultType()) {
3358 case DataType::Type::kInt32: {
3359 if (second.IsRegister()) {
3360 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3361 } else if (second.IsConstant()) {
3362 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3363 __ subl(first.AsRegister<CpuRegister>(), imm);
3364 } else {
3365 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3366 }
3367 break;
3368 }
3369 case DataType::Type::kInt64: {
3370 if (second.IsConstant()) {
3371 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3372 DCHECK(IsInt<32>(value));
3373 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3374 } else {
3375 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3376 }
3377 break;
3378 }
3379
3380 case DataType::Type::kFloat32: {
3381 if (second.IsFpuRegister()) {
3382 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3383 } else if (second.IsConstant()) {
3384 __ subss(first.AsFpuRegister<XmmRegister>(),
3385 codegen_->LiteralFloatAddress(
3386 second.GetConstant()->AsFloatConstant()->GetValue()));
3387 } else {
3388 DCHECK(second.IsStackSlot());
3389 __ subss(first.AsFpuRegister<XmmRegister>(),
3390 Address(CpuRegister(RSP), second.GetStackIndex()));
3391 }
3392 break;
3393 }
3394
3395 case DataType::Type::kFloat64: {
3396 if (second.IsFpuRegister()) {
3397 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3398 } else if (second.IsConstant()) {
3399 __ subsd(first.AsFpuRegister<XmmRegister>(),
3400 codegen_->LiteralDoubleAddress(
3401 second.GetConstant()->AsDoubleConstant()->GetValue()));
3402 } else {
3403 DCHECK(second.IsDoubleStackSlot());
3404 __ subsd(first.AsFpuRegister<XmmRegister>(),
3405 Address(CpuRegister(RSP), second.GetStackIndex()));
3406 }
3407 break;
3408 }
3409
3410 default:
3411 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3412 }
3413 }
3414
VisitMul(HMul * mul)3415 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3416 LocationSummary* locations =
3417 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3418 switch (mul->GetResultType()) {
3419 case DataType::Type::kInt32: {
3420 locations->SetInAt(0, Location::RequiresRegister());
3421 locations->SetInAt(1, Location::Any());
3422 if (mul->InputAt(1)->IsIntConstant()) {
3423 // Can use 3 operand multiply.
3424 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3425 } else {
3426 locations->SetOut(Location::SameAsFirstInput());
3427 }
3428 break;
3429 }
3430 case DataType::Type::kInt64: {
3431 locations->SetInAt(0, Location::RequiresRegister());
3432 locations->SetInAt(1, Location::Any());
3433 if (mul->InputAt(1)->IsLongConstant() &&
3434 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3435 // Can use 3 operand multiply.
3436 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3437 } else {
3438 locations->SetOut(Location::SameAsFirstInput());
3439 }
3440 break;
3441 }
3442 case DataType::Type::kFloat32:
3443 case DataType::Type::kFloat64: {
3444 locations->SetInAt(0, Location::RequiresFpuRegister());
3445 locations->SetInAt(1, Location::Any());
3446 locations->SetOut(Location::SameAsFirstInput());
3447 break;
3448 }
3449
3450 default:
3451 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3452 }
3453 }
3454
VisitMul(HMul * mul)3455 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3456 LocationSummary* locations = mul->GetLocations();
3457 Location first = locations->InAt(0);
3458 Location second = locations->InAt(1);
3459 Location out = locations->Out();
3460 switch (mul->GetResultType()) {
3461 case DataType::Type::kInt32:
3462 // The constant may have ended up in a register, so test explicitly to avoid
3463 // problems where the output may not be the same as the first operand.
3464 if (mul->InputAt(1)->IsIntConstant()) {
3465 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3466 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3467 } else if (second.IsRegister()) {
3468 DCHECK(first.Equals(out));
3469 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3470 } else {
3471 DCHECK(first.Equals(out));
3472 DCHECK(second.IsStackSlot());
3473 __ imull(first.AsRegister<CpuRegister>(),
3474 Address(CpuRegister(RSP), second.GetStackIndex()));
3475 }
3476 break;
3477 case DataType::Type::kInt64: {
3478 // The constant may have ended up in a register, so test explicitly to avoid
3479 // problems where the output may not be the same as the first operand.
3480 if (mul->InputAt(1)->IsLongConstant()) {
3481 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3482 if (IsInt<32>(value)) {
3483 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3484 Immediate(static_cast<int32_t>(value)));
3485 } else {
3486 // Have to use the constant area.
3487 DCHECK(first.Equals(out));
3488 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3489 }
3490 } else if (second.IsRegister()) {
3491 DCHECK(first.Equals(out));
3492 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3493 } else {
3494 DCHECK(second.IsDoubleStackSlot());
3495 DCHECK(first.Equals(out));
3496 __ imulq(first.AsRegister<CpuRegister>(),
3497 Address(CpuRegister(RSP), second.GetStackIndex()));
3498 }
3499 break;
3500 }
3501
3502 case DataType::Type::kFloat32: {
3503 DCHECK(first.Equals(out));
3504 if (second.IsFpuRegister()) {
3505 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3506 } else if (second.IsConstant()) {
3507 __ mulss(first.AsFpuRegister<XmmRegister>(),
3508 codegen_->LiteralFloatAddress(
3509 second.GetConstant()->AsFloatConstant()->GetValue()));
3510 } else {
3511 DCHECK(second.IsStackSlot());
3512 __ mulss(first.AsFpuRegister<XmmRegister>(),
3513 Address(CpuRegister(RSP), second.GetStackIndex()));
3514 }
3515 break;
3516 }
3517
3518 case DataType::Type::kFloat64: {
3519 DCHECK(first.Equals(out));
3520 if (second.IsFpuRegister()) {
3521 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3522 } else if (second.IsConstant()) {
3523 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3524 codegen_->LiteralDoubleAddress(
3525 second.GetConstant()->AsDoubleConstant()->GetValue()));
3526 } else {
3527 DCHECK(second.IsDoubleStackSlot());
3528 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3529 Address(CpuRegister(RSP), second.GetStackIndex()));
3530 }
3531 break;
3532 }
3533
3534 default:
3535 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3536 }
3537 }
3538
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3539 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3540 uint32_t stack_adjustment, bool is_float) {
3541 if (source.IsStackSlot()) {
3542 DCHECK(is_float);
3543 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3544 } else if (source.IsDoubleStackSlot()) {
3545 DCHECK(!is_float);
3546 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3547 } else {
3548 // Write the value to the temporary location on the stack and load to FP stack.
3549 if (is_float) {
3550 Location stack_temp = Location::StackSlot(temp_offset);
3551 codegen_->Move(stack_temp, source);
3552 __ flds(Address(CpuRegister(RSP), temp_offset));
3553 } else {
3554 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3555 codegen_->Move(stack_temp, source);
3556 __ fldl(Address(CpuRegister(RSP), temp_offset));
3557 }
3558 }
3559 }
3560
GenerateRemFP(HRem * rem)3561 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3562 DataType::Type type = rem->GetResultType();
3563 bool is_float = type == DataType::Type::kFloat32;
3564 size_t elem_size = DataType::Size(type);
3565 LocationSummary* locations = rem->GetLocations();
3566 Location first = locations->InAt(0);
3567 Location second = locations->InAt(1);
3568 Location out = locations->Out();
3569
3570 // Create stack space for 2 elements.
3571 // TODO: enhance register allocator to ask for stack temporaries.
3572 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3573
3574 // Load the values to the FP stack in reverse order, using temporaries if needed.
3575 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3576 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3577
3578 // Loop doing FPREM until we stabilize.
3579 NearLabel retry;
3580 __ Bind(&retry);
3581 __ fprem();
3582
3583 // Move FP status to AX.
3584 __ fstsw();
3585
3586 // And see if the argument reduction is complete. This is signaled by the
3587 // C2 FPU flag bit set to 0.
3588 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3589 __ j(kNotEqual, &retry);
3590
3591 // We have settled on the final value. Retrieve it into an XMM register.
3592 // Store FP top of stack to real stack.
3593 if (is_float) {
3594 __ fsts(Address(CpuRegister(RSP), 0));
3595 } else {
3596 __ fstl(Address(CpuRegister(RSP), 0));
3597 }
3598
3599 // Pop the 2 items from the FP stack.
3600 __ fucompp();
3601
3602 // Load the value from the stack into an XMM register.
3603 DCHECK(out.IsFpuRegister()) << out;
3604 if (is_float) {
3605 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3606 } else {
3607 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3608 }
3609
3610 // And remove the temporary stack space we allocated.
3611 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3612 }
3613
DivRemOneOrMinusOne(HBinaryOperation * instruction)3614 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3615 DCHECK(instruction->IsDiv() || instruction->IsRem());
3616
3617 LocationSummary* locations = instruction->GetLocations();
3618 Location second = locations->InAt(1);
3619 DCHECK(second.IsConstant());
3620
3621 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3622 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3623 int64_t imm = Int64FromConstant(second.GetConstant());
3624
3625 DCHECK(imm == 1 || imm == -1);
3626
3627 switch (instruction->GetResultType()) {
3628 case DataType::Type::kInt32: {
3629 if (instruction->IsRem()) {
3630 __ xorl(output_register, output_register);
3631 } else {
3632 __ movl(output_register, input_register);
3633 if (imm == -1) {
3634 __ negl(output_register);
3635 }
3636 }
3637 break;
3638 }
3639
3640 case DataType::Type::kInt64: {
3641 if (instruction->IsRem()) {
3642 __ xorl(output_register, output_register);
3643 } else {
3644 __ movq(output_register, input_register);
3645 if (imm == -1) {
3646 __ negq(output_register);
3647 }
3648 }
3649 break;
3650 }
3651
3652 default:
3653 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3654 }
3655 }
RemByPowerOfTwo(HRem * instruction)3656 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3657 LocationSummary* locations = instruction->GetLocations();
3658 Location second = locations->InAt(1);
3659 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3660 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3661 int64_t imm = Int64FromConstant(second.GetConstant());
3662 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3663 uint64_t abs_imm = AbsOrMin(imm);
3664 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3665 if (instruction->GetResultType() == DataType::Type::kInt32) {
3666 NearLabel done;
3667 __ movl(out, numerator);
3668 __ andl(out, Immediate(abs_imm-1));
3669 __ j(Condition::kZero, &done);
3670 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3671 __ testl(numerator, numerator);
3672 __ cmov(Condition::kLess, out, tmp, false);
3673 __ Bind(&done);
3674
3675 } else {
3676 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3677 codegen_->Load64BitValue(tmp, abs_imm - 1);
3678 NearLabel done;
3679
3680 __ movq(out, numerator);
3681 __ andq(out, tmp);
3682 __ j(Condition::kZero, &done);
3683 __ movq(tmp, numerator);
3684 __ sarq(tmp, Immediate(63));
3685 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3686 __ orq(out, tmp);
3687 __ Bind(&done);
3688 }
3689 }
DivByPowerOfTwo(HDiv * instruction)3690 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3691 LocationSummary* locations = instruction->GetLocations();
3692 Location second = locations->InAt(1);
3693
3694 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3695 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3696
3697 int64_t imm = Int64FromConstant(second.GetConstant());
3698 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3699 uint64_t abs_imm = AbsOrMin(imm);
3700
3701 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3702
3703 if (instruction->GetResultType() == DataType::Type::kInt32) {
3704 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3705 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3706 if (abs_imm == 2) {
3707 __ leal(tmp, Address(numerator, 0));
3708 __ shrl(tmp, Immediate(31));
3709 __ addl(tmp, numerator);
3710 } else {
3711 __ leal(tmp, Address(numerator, abs_imm - 1));
3712 __ testl(numerator, numerator);
3713 __ cmov(kGreaterEqual, tmp, numerator);
3714 }
3715 int shift = CTZ(imm);
3716 __ sarl(tmp, Immediate(shift));
3717
3718 if (imm < 0) {
3719 __ negl(tmp);
3720 }
3721
3722 __ movl(output_register, tmp);
3723 } else {
3724 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3725 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3726 if (abs_imm == 2) {
3727 __ movq(rdx, numerator);
3728 __ shrq(rdx, Immediate(63));
3729 __ addq(rdx, numerator);
3730 } else {
3731 codegen_->Load64BitValue(rdx, abs_imm - 1);
3732 __ addq(rdx, numerator);
3733 __ testq(numerator, numerator);
3734 __ cmov(kGreaterEqual, rdx, numerator);
3735 }
3736 int shift = CTZ(imm);
3737 __ sarq(rdx, Immediate(shift));
3738
3739 if (imm < 0) {
3740 __ negq(rdx);
3741 }
3742
3743 __ movq(output_register, rdx);
3744 }
3745 }
3746
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3747 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3748 DCHECK(instruction->IsDiv() || instruction->IsRem());
3749
3750 LocationSummary* locations = instruction->GetLocations();
3751 Location second = locations->InAt(1);
3752
3753 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3754 : locations->GetTemp(0).AsRegister<CpuRegister>();
3755 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3756 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3757 : locations->Out().AsRegister<CpuRegister>();
3758 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3759
3760 DCHECK_EQ(RAX, eax.AsRegister());
3761 DCHECK_EQ(RDX, edx.AsRegister());
3762 if (instruction->IsDiv()) {
3763 DCHECK_EQ(RAX, out.AsRegister());
3764 } else {
3765 DCHECK_EQ(RDX, out.AsRegister());
3766 }
3767
3768 int64_t magic;
3769 int shift;
3770
3771 // TODO: can these branches be written as one?
3772 if (instruction->GetResultType() == DataType::Type::kInt32) {
3773 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3774
3775 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3776
3777 __ movl(numerator, eax);
3778
3779 __ movl(eax, Immediate(magic));
3780 __ imull(numerator);
3781
3782 if (imm > 0 && magic < 0) {
3783 __ addl(edx, numerator);
3784 } else if (imm < 0 && magic > 0) {
3785 __ subl(edx, numerator);
3786 }
3787
3788 if (shift != 0) {
3789 __ sarl(edx, Immediate(shift));
3790 }
3791
3792 __ movl(eax, edx);
3793 __ shrl(edx, Immediate(31));
3794 __ addl(edx, eax);
3795
3796 if (instruction->IsRem()) {
3797 __ movl(eax, numerator);
3798 __ imull(edx, Immediate(imm));
3799 __ subl(eax, edx);
3800 __ movl(edx, eax);
3801 } else {
3802 __ movl(eax, edx);
3803 }
3804 } else {
3805 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3806
3807 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3808
3809 CpuRegister rax = eax;
3810 CpuRegister rdx = edx;
3811
3812 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
3813
3814 // Save the numerator.
3815 __ movq(numerator, rax);
3816
3817 // RAX = magic
3818 codegen_->Load64BitValue(rax, magic);
3819
3820 // RDX:RAX = magic * numerator
3821 __ imulq(numerator);
3822
3823 if (imm > 0 && magic < 0) {
3824 // RDX += numerator
3825 __ addq(rdx, numerator);
3826 } else if (imm < 0 && magic > 0) {
3827 // RDX -= numerator
3828 __ subq(rdx, numerator);
3829 }
3830
3831 // Shift if needed.
3832 if (shift != 0) {
3833 __ sarq(rdx, Immediate(shift));
3834 }
3835
3836 // RDX += 1 if RDX < 0
3837 __ movq(rax, rdx);
3838 __ shrq(rdx, Immediate(63));
3839 __ addq(rdx, rax);
3840
3841 if (instruction->IsRem()) {
3842 __ movq(rax, numerator);
3843
3844 if (IsInt<32>(imm)) {
3845 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3846 } else {
3847 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3848 }
3849
3850 __ subq(rax, rdx);
3851 __ movq(rdx, rax);
3852 } else {
3853 __ movq(rax, rdx);
3854 }
3855 }
3856 }
3857
GenerateDivRemIntegral(HBinaryOperation * instruction)3858 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3859 DCHECK(instruction->IsDiv() || instruction->IsRem());
3860 DataType::Type type = instruction->GetResultType();
3861 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3862
3863 bool is_div = instruction->IsDiv();
3864 LocationSummary* locations = instruction->GetLocations();
3865
3866 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3867 Location second = locations->InAt(1);
3868
3869 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3870 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3871
3872 if (second.IsConstant()) {
3873 int64_t imm = Int64FromConstant(second.GetConstant());
3874
3875 if (imm == 0) {
3876 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3877 } else if (imm == 1 || imm == -1) {
3878 DivRemOneOrMinusOne(instruction);
3879 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3880 if (is_div) {
3881 DivByPowerOfTwo(instruction->AsDiv());
3882 } else {
3883 RemByPowerOfTwo(instruction->AsRem());
3884 }
3885 } else {
3886 DCHECK(imm <= -2 || imm >= 2);
3887 GenerateDivRemWithAnyConstant(instruction);
3888 }
3889 } else {
3890 SlowPathCode* slow_path =
3891 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3892 instruction, out.AsRegister(), type, is_div);
3893 codegen_->AddSlowPath(slow_path);
3894
3895 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3896 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3897 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3898 // so it's safe to just use negl instead of more complex comparisons.
3899 if (type == DataType::Type::kInt32) {
3900 __ cmpl(second_reg, Immediate(-1));
3901 __ j(kEqual, slow_path->GetEntryLabel());
3902 // edx:eax <- sign-extended of eax
3903 __ cdq();
3904 // eax = quotient, edx = remainder
3905 __ idivl(second_reg);
3906 } else {
3907 __ cmpq(second_reg, Immediate(-1));
3908 __ j(kEqual, slow_path->GetEntryLabel());
3909 // rdx:rax <- sign-extended of rax
3910 __ cqo();
3911 // rax = quotient, rdx = remainder
3912 __ idivq(second_reg);
3913 }
3914 __ Bind(slow_path->GetExitLabel());
3915 }
3916 }
3917
VisitDiv(HDiv * div)3918 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3919 LocationSummary* locations =
3920 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3921 switch (div->GetResultType()) {
3922 case DataType::Type::kInt32:
3923 case DataType::Type::kInt64: {
3924 locations->SetInAt(0, Location::RegisterLocation(RAX));
3925 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3926 locations->SetOut(Location::SameAsFirstInput());
3927 // Intel uses edx:eax as the dividend.
3928 locations->AddTemp(Location::RegisterLocation(RDX));
3929 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3930 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3931 // output and request another temp.
3932 if (div->InputAt(1)->IsConstant()) {
3933 locations->AddTemp(Location::RequiresRegister());
3934 }
3935 break;
3936 }
3937
3938 case DataType::Type::kFloat32:
3939 case DataType::Type::kFloat64: {
3940 locations->SetInAt(0, Location::RequiresFpuRegister());
3941 locations->SetInAt(1, Location::Any());
3942 locations->SetOut(Location::SameAsFirstInput());
3943 break;
3944 }
3945
3946 default:
3947 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3948 }
3949 }
3950
VisitDiv(HDiv * div)3951 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3952 LocationSummary* locations = div->GetLocations();
3953 Location first = locations->InAt(0);
3954 Location second = locations->InAt(1);
3955 DCHECK(first.Equals(locations->Out()));
3956
3957 DataType::Type type = div->GetResultType();
3958 switch (type) {
3959 case DataType::Type::kInt32:
3960 case DataType::Type::kInt64: {
3961 GenerateDivRemIntegral(div);
3962 break;
3963 }
3964
3965 case DataType::Type::kFloat32: {
3966 if (second.IsFpuRegister()) {
3967 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3968 } else if (second.IsConstant()) {
3969 __ divss(first.AsFpuRegister<XmmRegister>(),
3970 codegen_->LiteralFloatAddress(
3971 second.GetConstant()->AsFloatConstant()->GetValue()));
3972 } else {
3973 DCHECK(second.IsStackSlot());
3974 __ divss(first.AsFpuRegister<XmmRegister>(),
3975 Address(CpuRegister(RSP), second.GetStackIndex()));
3976 }
3977 break;
3978 }
3979
3980 case DataType::Type::kFloat64: {
3981 if (second.IsFpuRegister()) {
3982 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3983 } else if (second.IsConstant()) {
3984 __ divsd(first.AsFpuRegister<XmmRegister>(),
3985 codegen_->LiteralDoubleAddress(
3986 second.GetConstant()->AsDoubleConstant()->GetValue()));
3987 } else {
3988 DCHECK(second.IsDoubleStackSlot());
3989 __ divsd(first.AsFpuRegister<XmmRegister>(),
3990 Address(CpuRegister(RSP), second.GetStackIndex()));
3991 }
3992 break;
3993 }
3994
3995 default:
3996 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3997 }
3998 }
3999
VisitRem(HRem * rem)4000 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4001 DataType::Type type = rem->GetResultType();
4002 LocationSummary* locations =
4003 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4004
4005 switch (type) {
4006 case DataType::Type::kInt32:
4007 case DataType::Type::kInt64: {
4008 locations->SetInAt(0, Location::RegisterLocation(RAX));
4009 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4010 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4011 locations->SetOut(Location::RegisterLocation(RDX));
4012 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4013 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4014 // output and request another temp.
4015 if (rem->InputAt(1)->IsConstant()) {
4016 locations->AddTemp(Location::RequiresRegister());
4017 }
4018 break;
4019 }
4020
4021 case DataType::Type::kFloat32:
4022 case DataType::Type::kFloat64: {
4023 locations->SetInAt(0, Location::Any());
4024 locations->SetInAt(1, Location::Any());
4025 locations->SetOut(Location::RequiresFpuRegister());
4026 locations->AddTemp(Location::RegisterLocation(RAX));
4027 break;
4028 }
4029
4030 default:
4031 LOG(FATAL) << "Unexpected rem type " << type;
4032 }
4033 }
4034
VisitRem(HRem * rem)4035 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4036 DataType::Type type = rem->GetResultType();
4037 switch (type) {
4038 case DataType::Type::kInt32:
4039 case DataType::Type::kInt64: {
4040 GenerateDivRemIntegral(rem);
4041 break;
4042 }
4043 case DataType::Type::kFloat32:
4044 case DataType::Type::kFloat64: {
4045 GenerateRemFP(rem);
4046 break;
4047 }
4048 default:
4049 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4050 }
4051 }
4052
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4053 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4054 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4055 switch (minmax->GetResultType()) {
4056 case DataType::Type::kInt32:
4057 case DataType::Type::kInt64:
4058 locations->SetInAt(0, Location::RequiresRegister());
4059 locations->SetInAt(1, Location::RequiresRegister());
4060 locations->SetOut(Location::SameAsFirstInput());
4061 break;
4062 case DataType::Type::kFloat32:
4063 case DataType::Type::kFloat64:
4064 locations->SetInAt(0, Location::RequiresFpuRegister());
4065 locations->SetInAt(1, Location::RequiresFpuRegister());
4066 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4067 // the second input to be the output (we can simply swap inputs).
4068 locations->SetOut(Location::SameAsFirstInput());
4069 break;
4070 default:
4071 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4072 }
4073 }
4074
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4075 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4076 bool is_min,
4077 DataType::Type type) {
4078 Location op1_loc = locations->InAt(0);
4079 Location op2_loc = locations->InAt(1);
4080
4081 // Shortcut for same input locations.
4082 if (op1_loc.Equals(op2_loc)) {
4083 // Can return immediately, as op1_loc == out_loc.
4084 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4085 // a copy here.
4086 DCHECK(locations->Out().Equals(op1_loc));
4087 return;
4088 }
4089
4090 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4091 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4092
4093 // (out := op1)
4094 // out <=? op2
4095 // if out is min jmp done
4096 // out := op2
4097 // done:
4098
4099 if (type == DataType::Type::kInt64) {
4100 __ cmpq(out, op2);
4101 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4102 } else {
4103 DCHECK_EQ(type, DataType::Type::kInt32);
4104 __ cmpl(out, op2);
4105 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4106 }
4107 }
4108
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4109 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4110 bool is_min,
4111 DataType::Type type) {
4112 Location op1_loc = locations->InAt(0);
4113 Location op2_loc = locations->InAt(1);
4114 Location out_loc = locations->Out();
4115 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4116
4117 // Shortcut for same input locations.
4118 if (op1_loc.Equals(op2_loc)) {
4119 DCHECK(out_loc.Equals(op1_loc));
4120 return;
4121 }
4122
4123 // (out := op1)
4124 // out <=? op2
4125 // if Nan jmp Nan_label
4126 // if out is min jmp done
4127 // if op2 is min jmp op2_label
4128 // handle -0/+0
4129 // jmp done
4130 // Nan_label:
4131 // out := NaN
4132 // op2_label:
4133 // out := op2
4134 // done:
4135 //
4136 // This removes one jmp, but needs to copy one input (op1) to out.
4137 //
4138 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4139
4140 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4141
4142 NearLabel nan, done, op2_label;
4143 if (type == DataType::Type::kFloat64) {
4144 __ ucomisd(out, op2);
4145 } else {
4146 DCHECK_EQ(type, DataType::Type::kFloat32);
4147 __ ucomiss(out, op2);
4148 }
4149
4150 __ j(Condition::kParityEven, &nan);
4151
4152 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4153 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4154
4155 // Handle 0.0/-0.0.
4156 if (is_min) {
4157 if (type == DataType::Type::kFloat64) {
4158 __ orpd(out, op2);
4159 } else {
4160 __ orps(out, op2);
4161 }
4162 } else {
4163 if (type == DataType::Type::kFloat64) {
4164 __ andpd(out, op2);
4165 } else {
4166 __ andps(out, op2);
4167 }
4168 }
4169 __ jmp(&done);
4170
4171 // NaN handling.
4172 __ Bind(&nan);
4173 if (type == DataType::Type::kFloat64) {
4174 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4175 } else {
4176 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4177 }
4178 __ jmp(&done);
4179
4180 // out := op2;
4181 __ Bind(&op2_label);
4182 if (type == DataType::Type::kFloat64) {
4183 __ movsd(out, op2);
4184 } else {
4185 __ movss(out, op2);
4186 }
4187
4188 // Done.
4189 __ Bind(&done);
4190 }
4191
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4192 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4193 DataType::Type type = minmax->GetResultType();
4194 switch (type) {
4195 case DataType::Type::kInt32:
4196 case DataType::Type::kInt64:
4197 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4198 break;
4199 case DataType::Type::kFloat32:
4200 case DataType::Type::kFloat64:
4201 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4202 break;
4203 default:
4204 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4205 }
4206 }
4207
VisitMin(HMin * min)4208 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4209 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4210 }
4211
VisitMin(HMin * min)4212 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4213 GenerateMinMax(min, /*is_min*/ true);
4214 }
4215
VisitMax(HMax * max)4216 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4217 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4218 }
4219
VisitMax(HMax * max)4220 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4221 GenerateMinMax(max, /*is_min*/ false);
4222 }
4223
VisitAbs(HAbs * abs)4224 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4225 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4226 switch (abs->GetResultType()) {
4227 case DataType::Type::kInt32:
4228 case DataType::Type::kInt64:
4229 locations->SetInAt(0, Location::RequiresRegister());
4230 locations->SetOut(Location::SameAsFirstInput());
4231 locations->AddTemp(Location::RequiresRegister());
4232 break;
4233 case DataType::Type::kFloat32:
4234 case DataType::Type::kFloat64:
4235 locations->SetInAt(0, Location::RequiresFpuRegister());
4236 locations->SetOut(Location::SameAsFirstInput());
4237 locations->AddTemp(Location::RequiresFpuRegister());
4238 break;
4239 default:
4240 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4241 }
4242 }
4243
VisitAbs(HAbs * abs)4244 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4245 LocationSummary* locations = abs->GetLocations();
4246 switch (abs->GetResultType()) {
4247 case DataType::Type::kInt32: {
4248 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4249 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4250 // Create mask.
4251 __ movl(mask, out);
4252 __ sarl(mask, Immediate(31));
4253 // Add mask.
4254 __ addl(out, mask);
4255 __ xorl(out, mask);
4256 break;
4257 }
4258 case DataType::Type::kInt64: {
4259 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4260 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4261 // Create mask.
4262 __ movq(mask, out);
4263 __ sarq(mask, Immediate(63));
4264 // Add mask.
4265 __ addq(out, mask);
4266 __ xorq(out, mask);
4267 break;
4268 }
4269 case DataType::Type::kFloat32: {
4270 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4271 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4272 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4273 __ andps(out, mask);
4274 break;
4275 }
4276 case DataType::Type::kFloat64: {
4277 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4278 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4279 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4280 __ andpd(out, mask);
4281 break;
4282 }
4283 default:
4284 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4285 }
4286 }
4287
VisitDivZeroCheck(HDivZeroCheck * instruction)4288 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4289 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4290 locations->SetInAt(0, Location::Any());
4291 }
4292
VisitDivZeroCheck(HDivZeroCheck * instruction)4293 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4294 SlowPathCode* slow_path =
4295 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4296 codegen_->AddSlowPath(slow_path);
4297
4298 LocationSummary* locations = instruction->GetLocations();
4299 Location value = locations->InAt(0);
4300
4301 switch (instruction->GetType()) {
4302 case DataType::Type::kBool:
4303 case DataType::Type::kUint8:
4304 case DataType::Type::kInt8:
4305 case DataType::Type::kUint16:
4306 case DataType::Type::kInt16:
4307 case DataType::Type::kInt32: {
4308 if (value.IsRegister()) {
4309 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4310 __ j(kEqual, slow_path->GetEntryLabel());
4311 } else if (value.IsStackSlot()) {
4312 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4313 __ j(kEqual, slow_path->GetEntryLabel());
4314 } else {
4315 DCHECK(value.IsConstant()) << value;
4316 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4317 __ jmp(slow_path->GetEntryLabel());
4318 }
4319 }
4320 break;
4321 }
4322 case DataType::Type::kInt64: {
4323 if (value.IsRegister()) {
4324 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4325 __ j(kEqual, slow_path->GetEntryLabel());
4326 } else if (value.IsDoubleStackSlot()) {
4327 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4328 __ j(kEqual, slow_path->GetEntryLabel());
4329 } else {
4330 DCHECK(value.IsConstant()) << value;
4331 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4332 __ jmp(slow_path->GetEntryLabel());
4333 }
4334 }
4335 break;
4336 }
4337 default:
4338 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4339 }
4340 }
4341
HandleShift(HBinaryOperation * op)4342 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4343 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4344
4345 LocationSummary* locations =
4346 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4347
4348 switch (op->GetResultType()) {
4349 case DataType::Type::kInt32:
4350 case DataType::Type::kInt64: {
4351 locations->SetInAt(0, Location::RequiresRegister());
4352 // The shift count needs to be in CL.
4353 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4354 locations->SetOut(Location::SameAsFirstInput());
4355 break;
4356 }
4357 default:
4358 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4359 }
4360 }
4361
HandleShift(HBinaryOperation * op)4362 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4363 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4364
4365 LocationSummary* locations = op->GetLocations();
4366 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4367 Location second = locations->InAt(1);
4368
4369 switch (op->GetResultType()) {
4370 case DataType::Type::kInt32: {
4371 if (second.IsRegister()) {
4372 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4373 if (op->IsShl()) {
4374 __ shll(first_reg, second_reg);
4375 } else if (op->IsShr()) {
4376 __ sarl(first_reg, second_reg);
4377 } else {
4378 __ shrl(first_reg, second_reg);
4379 }
4380 } else {
4381 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4382 if (op->IsShl()) {
4383 __ shll(first_reg, imm);
4384 } else if (op->IsShr()) {
4385 __ sarl(first_reg, imm);
4386 } else {
4387 __ shrl(first_reg, imm);
4388 }
4389 }
4390 break;
4391 }
4392 case DataType::Type::kInt64: {
4393 if (second.IsRegister()) {
4394 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4395 if (op->IsShl()) {
4396 __ shlq(first_reg, second_reg);
4397 } else if (op->IsShr()) {
4398 __ sarq(first_reg, second_reg);
4399 } else {
4400 __ shrq(first_reg, second_reg);
4401 }
4402 } else {
4403 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4404 if (op->IsShl()) {
4405 __ shlq(first_reg, imm);
4406 } else if (op->IsShr()) {
4407 __ sarq(first_reg, imm);
4408 } else {
4409 __ shrq(first_reg, imm);
4410 }
4411 }
4412 break;
4413 }
4414 default:
4415 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4416 UNREACHABLE();
4417 }
4418 }
4419
VisitRor(HRor * ror)4420 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4421 LocationSummary* locations =
4422 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4423
4424 switch (ror->GetResultType()) {
4425 case DataType::Type::kInt32:
4426 case DataType::Type::kInt64: {
4427 locations->SetInAt(0, Location::RequiresRegister());
4428 // The shift count needs to be in CL (unless it is a constant).
4429 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4430 locations->SetOut(Location::SameAsFirstInput());
4431 break;
4432 }
4433 default:
4434 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4435 UNREACHABLE();
4436 }
4437 }
4438
VisitRor(HRor * ror)4439 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4440 LocationSummary* locations = ror->GetLocations();
4441 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4442 Location second = locations->InAt(1);
4443
4444 switch (ror->GetResultType()) {
4445 case DataType::Type::kInt32:
4446 if (second.IsRegister()) {
4447 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4448 __ rorl(first_reg, second_reg);
4449 } else {
4450 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4451 __ rorl(first_reg, imm);
4452 }
4453 break;
4454 case DataType::Type::kInt64:
4455 if (second.IsRegister()) {
4456 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4457 __ rorq(first_reg, second_reg);
4458 } else {
4459 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4460 __ rorq(first_reg, imm);
4461 }
4462 break;
4463 default:
4464 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4465 UNREACHABLE();
4466 }
4467 }
4468
VisitShl(HShl * shl)4469 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4470 HandleShift(shl);
4471 }
4472
VisitShl(HShl * shl)4473 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4474 HandleShift(shl);
4475 }
4476
VisitShr(HShr * shr)4477 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4478 HandleShift(shr);
4479 }
4480
VisitShr(HShr * shr)4481 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4482 HandleShift(shr);
4483 }
4484
VisitUShr(HUShr * ushr)4485 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4486 HandleShift(ushr);
4487 }
4488
VisitUShr(HUShr * ushr)4489 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4490 HandleShift(ushr);
4491 }
4492
VisitNewInstance(HNewInstance * instruction)4493 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4494 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4495 instruction, LocationSummary::kCallOnMainOnly);
4496 InvokeRuntimeCallingConvention calling_convention;
4497 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4498 locations->SetOut(Location::RegisterLocation(RAX));
4499 }
4500
VisitNewInstance(HNewInstance * instruction)4501 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4502 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4503 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4504 DCHECK(!codegen_->IsLeafMethod());
4505 }
4506
VisitNewArray(HNewArray * instruction)4507 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4508 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4509 instruction, LocationSummary::kCallOnMainOnly);
4510 InvokeRuntimeCallingConvention calling_convention;
4511 locations->SetOut(Location::RegisterLocation(RAX));
4512 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4513 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4514 }
4515
VisitNewArray(HNewArray * instruction)4516 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4517 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4518 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4519 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4520 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4521 DCHECK(!codegen_->IsLeafMethod());
4522 }
4523
VisitParameterValue(HParameterValue * instruction)4524 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4525 LocationSummary* locations =
4526 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4527 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4528 if (location.IsStackSlot()) {
4529 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4530 } else if (location.IsDoubleStackSlot()) {
4531 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4532 }
4533 locations->SetOut(location);
4534 }
4535
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4536 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4537 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4538 // Nothing to do, the parameter is already at its location.
4539 }
4540
VisitCurrentMethod(HCurrentMethod * instruction)4541 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4542 LocationSummary* locations =
4543 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4544 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4545 }
4546
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4547 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4548 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4549 // Nothing to do, the method is already at its location.
4550 }
4551
VisitClassTableGet(HClassTableGet * instruction)4552 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4553 LocationSummary* locations =
4554 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4555 locations->SetInAt(0, Location::RequiresRegister());
4556 locations->SetOut(Location::RequiresRegister());
4557 }
4558
VisitClassTableGet(HClassTableGet * instruction)4559 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4560 LocationSummary* locations = instruction->GetLocations();
4561 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4562 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4563 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4564 __ movq(locations->Out().AsRegister<CpuRegister>(),
4565 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4566 } else {
4567 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4568 instruction->GetIndex(), kX86_64PointerSize));
4569 __ movq(locations->Out().AsRegister<CpuRegister>(),
4570 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4571 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4572 __ movq(locations->Out().AsRegister<CpuRegister>(),
4573 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4574 }
4575 }
4576
VisitNot(HNot * not_)4577 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4578 LocationSummary* locations =
4579 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4580 locations->SetInAt(0, Location::RequiresRegister());
4581 locations->SetOut(Location::SameAsFirstInput());
4582 }
4583
VisitNot(HNot * not_)4584 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4585 LocationSummary* locations = not_->GetLocations();
4586 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4587 locations->Out().AsRegister<CpuRegister>().AsRegister());
4588 Location out = locations->Out();
4589 switch (not_->GetResultType()) {
4590 case DataType::Type::kInt32:
4591 __ notl(out.AsRegister<CpuRegister>());
4592 break;
4593
4594 case DataType::Type::kInt64:
4595 __ notq(out.AsRegister<CpuRegister>());
4596 break;
4597
4598 default:
4599 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4600 }
4601 }
4602
VisitBooleanNot(HBooleanNot * bool_not)4603 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4604 LocationSummary* locations =
4605 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4606 locations->SetInAt(0, Location::RequiresRegister());
4607 locations->SetOut(Location::SameAsFirstInput());
4608 }
4609
VisitBooleanNot(HBooleanNot * bool_not)4610 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4611 LocationSummary* locations = bool_not->GetLocations();
4612 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4613 locations->Out().AsRegister<CpuRegister>().AsRegister());
4614 Location out = locations->Out();
4615 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4616 }
4617
VisitPhi(HPhi * instruction)4618 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4619 LocationSummary* locations =
4620 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4621 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4622 locations->SetInAt(i, Location::Any());
4623 }
4624 locations->SetOut(Location::Any());
4625 }
4626
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4627 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4628 LOG(FATAL) << "Unimplemented";
4629 }
4630
GenerateMemoryBarrier(MemBarrierKind kind)4631 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4632 /*
4633 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4634 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4635 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4636 */
4637 switch (kind) {
4638 case MemBarrierKind::kAnyAny: {
4639 MemoryFence();
4640 break;
4641 }
4642 case MemBarrierKind::kAnyStore:
4643 case MemBarrierKind::kLoadAny:
4644 case MemBarrierKind::kStoreStore: {
4645 // nop
4646 break;
4647 }
4648 case MemBarrierKind::kNTStoreStore:
4649 // Non-Temporal Store/Store needs an explicit fence.
4650 MemoryFence(/* non-temporal= */ true);
4651 break;
4652 }
4653 }
4654
HandleFieldGet(HInstruction * instruction)4655 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4656 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4657
4658 bool object_field_get_with_read_barrier =
4659 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4660 LocationSummary* locations =
4661 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4662 object_field_get_with_read_barrier
4663 ? LocationSummary::kCallOnSlowPath
4664 : LocationSummary::kNoCall);
4665 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4666 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4667 }
4668 locations->SetInAt(0, Location::RequiresRegister());
4669 if (DataType::IsFloatingPointType(instruction->GetType())) {
4670 locations->SetOut(Location::RequiresFpuRegister());
4671 } else {
4672 // The output overlaps for an object field get when read barriers
4673 // are enabled: we do not want the move to overwrite the object's
4674 // location, as we need it to emit the read barrier.
4675 locations->SetOut(
4676 Location::RequiresRegister(),
4677 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4678 }
4679 }
4680
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4681 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4682 const FieldInfo& field_info) {
4683 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4684
4685 LocationSummary* locations = instruction->GetLocations();
4686 Location base_loc = locations->InAt(0);
4687 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4688 Location out = locations->Out();
4689 bool is_volatile = field_info.IsVolatile();
4690 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4691 DataType::Type load_type = instruction->GetType();
4692 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4693
4694 switch (load_type) {
4695 case DataType::Type::kBool:
4696 case DataType::Type::kUint8: {
4697 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4698 break;
4699 }
4700
4701 case DataType::Type::kInt8: {
4702 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4703 break;
4704 }
4705
4706 case DataType::Type::kUint16: {
4707 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4708 break;
4709 }
4710
4711 case DataType::Type::kInt16: {
4712 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4713 break;
4714 }
4715
4716 case DataType::Type::kInt32: {
4717 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4718 break;
4719 }
4720
4721 case DataType::Type::kReference: {
4722 // /* HeapReference<Object> */ out = *(base + offset)
4723 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4724 // Note that a potential implicit null check is handled in this
4725 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4726 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4727 instruction, out, base, offset, /* needs_null_check= */ true);
4728 if (is_volatile) {
4729 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4730 }
4731 } else {
4732 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4733 codegen_->MaybeRecordImplicitNullCheck(instruction);
4734 if (is_volatile) {
4735 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4736 }
4737 // If read barriers are enabled, emit read barriers other than
4738 // Baker's using a slow path (and also unpoison the loaded
4739 // reference, if heap poisoning is enabled).
4740 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4741 }
4742 break;
4743 }
4744
4745 case DataType::Type::kInt64: {
4746 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4747 break;
4748 }
4749
4750 case DataType::Type::kFloat32: {
4751 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4752 break;
4753 }
4754
4755 case DataType::Type::kFloat64: {
4756 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4757 break;
4758 }
4759
4760 case DataType::Type::kUint32:
4761 case DataType::Type::kUint64:
4762 case DataType::Type::kVoid:
4763 LOG(FATAL) << "Unreachable type " << load_type;
4764 UNREACHABLE();
4765 }
4766
4767 if (load_type == DataType::Type::kReference) {
4768 // Potential implicit null checks, in the case of reference
4769 // fields, are handled in the previous switch statement.
4770 } else {
4771 codegen_->MaybeRecordImplicitNullCheck(instruction);
4772 }
4773
4774 if (is_volatile) {
4775 if (load_type == DataType::Type::kReference) {
4776 // Memory barriers, in the case of references, are also handled
4777 // in the previous switch statement.
4778 } else {
4779 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4780 }
4781 }
4782 }
4783
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4784 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4785 const FieldInfo& field_info) {
4786 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4787
4788 LocationSummary* locations =
4789 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4790 DataType::Type field_type = field_info.GetFieldType();
4791 bool is_volatile = field_info.IsVolatile();
4792 bool needs_write_barrier =
4793 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4794
4795 locations->SetInAt(0, Location::RequiresRegister());
4796 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4797 if (is_volatile) {
4798 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4799 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4800 } else {
4801 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4802 }
4803 } else {
4804 if (is_volatile) {
4805 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4806 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4807 } else {
4808 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4809 }
4810 }
4811 if (needs_write_barrier) {
4812 // Temporary registers for the write barrier.
4813 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4814 locations->AddTemp(Location::RequiresRegister());
4815 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4816 // Temporary register for the reference poisoning.
4817 locations->AddTemp(Location::RequiresRegister());
4818 }
4819 }
4820
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4821 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4822 const FieldInfo& field_info,
4823 bool value_can_be_null) {
4824 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4825
4826 LocationSummary* locations = instruction->GetLocations();
4827 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4828 Location value = locations->InAt(1);
4829 bool is_volatile = field_info.IsVolatile();
4830 DataType::Type field_type = field_info.GetFieldType();
4831 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4832
4833 if (is_volatile) {
4834 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4835 }
4836
4837 bool maybe_record_implicit_null_check_done = false;
4838
4839 switch (field_type) {
4840 case DataType::Type::kBool:
4841 case DataType::Type::kUint8:
4842 case DataType::Type::kInt8: {
4843 if (value.IsConstant()) {
4844 __ movb(Address(base, offset),
4845 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4846 } else {
4847 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4848 }
4849 break;
4850 }
4851
4852 case DataType::Type::kUint16:
4853 case DataType::Type::kInt16: {
4854 if (value.IsConstant()) {
4855 __ movw(Address(base, offset),
4856 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4857 } else {
4858 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4859 }
4860 break;
4861 }
4862
4863 case DataType::Type::kInt32:
4864 case DataType::Type::kReference: {
4865 if (value.IsConstant()) {
4866 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4867 // `field_type == DataType::Type::kReference` implies `v == 0`.
4868 DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4869 // Note: if heap poisoning is enabled, no need to poison
4870 // (negate) `v` if it is a reference, as it would be null.
4871 __ movl(Address(base, offset), Immediate(v));
4872 } else {
4873 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4874 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4875 __ movl(temp, value.AsRegister<CpuRegister>());
4876 __ PoisonHeapReference(temp);
4877 __ movl(Address(base, offset), temp);
4878 } else {
4879 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4880 }
4881 }
4882 break;
4883 }
4884
4885 case DataType::Type::kInt64: {
4886 if (value.IsConstant()) {
4887 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4888 codegen_->MoveInt64ToAddress(Address(base, offset),
4889 Address(base, offset + sizeof(int32_t)),
4890 v,
4891 instruction);
4892 maybe_record_implicit_null_check_done = true;
4893 } else {
4894 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4895 }
4896 break;
4897 }
4898
4899 case DataType::Type::kFloat32: {
4900 if (value.IsConstant()) {
4901 int32_t v =
4902 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4903 __ movl(Address(base, offset), Immediate(v));
4904 } else {
4905 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4906 }
4907 break;
4908 }
4909
4910 case DataType::Type::kFloat64: {
4911 if (value.IsConstant()) {
4912 int64_t v =
4913 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4914 codegen_->MoveInt64ToAddress(Address(base, offset),
4915 Address(base, offset + sizeof(int32_t)),
4916 v,
4917 instruction);
4918 maybe_record_implicit_null_check_done = true;
4919 } else {
4920 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4921 }
4922 break;
4923 }
4924
4925 case DataType::Type::kUint32:
4926 case DataType::Type::kUint64:
4927 case DataType::Type::kVoid:
4928 LOG(FATAL) << "Unreachable type " << field_type;
4929 UNREACHABLE();
4930 }
4931
4932 if (!maybe_record_implicit_null_check_done) {
4933 codegen_->MaybeRecordImplicitNullCheck(instruction);
4934 }
4935
4936 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4937 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4938 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4939 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4940 }
4941
4942 if (is_volatile) {
4943 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4944 }
4945 }
4946
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4947 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4948 HandleFieldSet(instruction, instruction->GetFieldInfo());
4949 }
4950
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4951 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4952 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4953 }
4954
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4955 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4956 HandleFieldGet(instruction);
4957 }
4958
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4959 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4960 HandleFieldGet(instruction, instruction->GetFieldInfo());
4961 }
4962
VisitStaticFieldGet(HStaticFieldGet * instruction)4963 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4964 HandleFieldGet(instruction);
4965 }
4966
VisitStaticFieldGet(HStaticFieldGet * instruction)4967 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4968 HandleFieldGet(instruction, instruction->GetFieldInfo());
4969 }
4970
VisitStaticFieldSet(HStaticFieldSet * instruction)4971 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4972 HandleFieldSet(instruction, instruction->GetFieldInfo());
4973 }
4974
VisitStaticFieldSet(HStaticFieldSet * instruction)4975 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4976 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4977 }
4978
VisitStringBuilderAppend(HStringBuilderAppend * instruction)4979 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
4980 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
4981 }
4982
VisitStringBuilderAppend(HStringBuilderAppend * instruction)4983 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
4984 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
4985 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
4986 }
4987
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4988 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4989 HUnresolvedInstanceFieldGet* instruction) {
4990 FieldAccessCallingConventionX86_64 calling_convention;
4991 codegen_->CreateUnresolvedFieldLocationSummary(
4992 instruction, instruction->GetFieldType(), calling_convention);
4993 }
4994
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4995 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4996 HUnresolvedInstanceFieldGet* instruction) {
4997 FieldAccessCallingConventionX86_64 calling_convention;
4998 codegen_->GenerateUnresolvedFieldAccess(instruction,
4999 instruction->GetFieldType(),
5000 instruction->GetFieldIndex(),
5001 instruction->GetDexPc(),
5002 calling_convention);
5003 }
5004
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5005 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5006 HUnresolvedInstanceFieldSet* instruction) {
5007 FieldAccessCallingConventionX86_64 calling_convention;
5008 codegen_->CreateUnresolvedFieldLocationSummary(
5009 instruction, instruction->GetFieldType(), calling_convention);
5010 }
5011
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5012 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5013 HUnresolvedInstanceFieldSet* instruction) {
5014 FieldAccessCallingConventionX86_64 calling_convention;
5015 codegen_->GenerateUnresolvedFieldAccess(instruction,
5016 instruction->GetFieldType(),
5017 instruction->GetFieldIndex(),
5018 instruction->GetDexPc(),
5019 calling_convention);
5020 }
5021
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5022 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5023 HUnresolvedStaticFieldGet* instruction) {
5024 FieldAccessCallingConventionX86_64 calling_convention;
5025 codegen_->CreateUnresolvedFieldLocationSummary(
5026 instruction, instruction->GetFieldType(), calling_convention);
5027 }
5028
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5029 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5030 HUnresolvedStaticFieldGet* instruction) {
5031 FieldAccessCallingConventionX86_64 calling_convention;
5032 codegen_->GenerateUnresolvedFieldAccess(instruction,
5033 instruction->GetFieldType(),
5034 instruction->GetFieldIndex(),
5035 instruction->GetDexPc(),
5036 calling_convention);
5037 }
5038
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5039 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5040 HUnresolvedStaticFieldSet* instruction) {
5041 FieldAccessCallingConventionX86_64 calling_convention;
5042 codegen_->CreateUnresolvedFieldLocationSummary(
5043 instruction, instruction->GetFieldType(), calling_convention);
5044 }
5045
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5046 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5047 HUnresolvedStaticFieldSet* instruction) {
5048 FieldAccessCallingConventionX86_64 calling_convention;
5049 codegen_->GenerateUnresolvedFieldAccess(instruction,
5050 instruction->GetFieldType(),
5051 instruction->GetFieldIndex(),
5052 instruction->GetDexPc(),
5053 calling_convention);
5054 }
5055
VisitNullCheck(HNullCheck * instruction)5056 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5057 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5058 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5059 ? Location::RequiresRegister()
5060 : Location::Any();
5061 locations->SetInAt(0, loc);
5062 }
5063
GenerateImplicitNullCheck(HNullCheck * instruction)5064 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5065 if (CanMoveNullCheckToUser(instruction)) {
5066 return;
5067 }
5068 LocationSummary* locations = instruction->GetLocations();
5069 Location obj = locations->InAt(0);
5070
5071 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5072 RecordPcInfo(instruction, instruction->GetDexPc());
5073 }
5074
GenerateExplicitNullCheck(HNullCheck * instruction)5075 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5076 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5077 AddSlowPath(slow_path);
5078
5079 LocationSummary* locations = instruction->GetLocations();
5080 Location obj = locations->InAt(0);
5081
5082 if (obj.IsRegister()) {
5083 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5084 } else if (obj.IsStackSlot()) {
5085 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5086 } else {
5087 DCHECK(obj.IsConstant()) << obj;
5088 DCHECK(obj.GetConstant()->IsNullConstant());
5089 __ jmp(slow_path->GetEntryLabel());
5090 return;
5091 }
5092 __ j(kEqual, slow_path->GetEntryLabel());
5093 }
5094
VisitNullCheck(HNullCheck * instruction)5095 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5096 codegen_->GenerateNullCheck(instruction);
5097 }
5098
VisitArrayGet(HArrayGet * instruction)5099 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5100 bool object_array_get_with_read_barrier =
5101 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5102 LocationSummary* locations =
5103 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5104 object_array_get_with_read_barrier
5105 ? LocationSummary::kCallOnSlowPath
5106 : LocationSummary::kNoCall);
5107 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5108 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5109 }
5110 locations->SetInAt(0, Location::RequiresRegister());
5111 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5112 if (DataType::IsFloatingPointType(instruction->GetType())) {
5113 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5114 } else {
5115 // The output overlaps for an object array get when read barriers
5116 // are enabled: we do not want the move to overwrite the array's
5117 // location, as we need it to emit the read barrier.
5118 locations->SetOut(
5119 Location::RequiresRegister(),
5120 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5121 }
5122 }
5123
VisitArrayGet(HArrayGet * instruction)5124 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5125 LocationSummary* locations = instruction->GetLocations();
5126 Location obj_loc = locations->InAt(0);
5127 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5128 Location index = locations->InAt(1);
5129 Location out_loc = locations->Out();
5130 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5131
5132 DataType::Type type = instruction->GetType();
5133 switch (type) {
5134 case DataType::Type::kBool:
5135 case DataType::Type::kUint8: {
5136 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5137 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5138 break;
5139 }
5140
5141 case DataType::Type::kInt8: {
5142 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5143 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5144 break;
5145 }
5146
5147 case DataType::Type::kUint16: {
5148 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5149 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5150 // Branch cases into compressed and uncompressed for each index's type.
5151 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5152 NearLabel done, not_compressed;
5153 __ testb(Address(obj, count_offset), Immediate(1));
5154 codegen_->MaybeRecordImplicitNullCheck(instruction);
5155 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5156 "Expecting 0=compressed, 1=uncompressed");
5157 __ j(kNotZero, ¬_compressed);
5158 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5159 __ jmp(&done);
5160 __ Bind(¬_compressed);
5161 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5162 __ Bind(&done);
5163 } else {
5164 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5165 }
5166 break;
5167 }
5168
5169 case DataType::Type::kInt16: {
5170 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5171 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5172 break;
5173 }
5174
5175 case DataType::Type::kInt32: {
5176 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5177 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5178 break;
5179 }
5180
5181 case DataType::Type::kReference: {
5182 static_assert(
5183 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5184 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5185 // /* HeapReference<Object> */ out =
5186 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5187 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5188 // Note that a potential implicit null check is handled in this
5189 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5190 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5191 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5192 } else {
5193 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5194 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5195 codegen_->MaybeRecordImplicitNullCheck(instruction);
5196 // If read barriers are enabled, emit read barriers other than
5197 // Baker's using a slow path (and also unpoison the loaded
5198 // reference, if heap poisoning is enabled).
5199 if (index.IsConstant()) {
5200 uint32_t offset =
5201 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5202 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5203 } else {
5204 codegen_->MaybeGenerateReadBarrierSlow(
5205 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5206 }
5207 }
5208 break;
5209 }
5210
5211 case DataType::Type::kInt64: {
5212 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5213 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5214 break;
5215 }
5216
5217 case DataType::Type::kFloat32: {
5218 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5219 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5220 break;
5221 }
5222
5223 case DataType::Type::kFloat64: {
5224 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5225 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5226 break;
5227 }
5228
5229 case DataType::Type::kUint32:
5230 case DataType::Type::kUint64:
5231 case DataType::Type::kVoid:
5232 LOG(FATAL) << "Unreachable type " << type;
5233 UNREACHABLE();
5234 }
5235
5236 if (type == DataType::Type::kReference) {
5237 // Potential implicit null checks, in the case of reference
5238 // arrays, are handled in the previous switch statement.
5239 } else {
5240 codegen_->MaybeRecordImplicitNullCheck(instruction);
5241 }
5242 }
5243
VisitArraySet(HArraySet * instruction)5244 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5245 DataType::Type value_type = instruction->GetComponentType();
5246
5247 bool needs_write_barrier =
5248 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5249 bool needs_type_check = instruction->NeedsTypeCheck();
5250
5251 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5252 instruction,
5253 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5254
5255 locations->SetInAt(0, Location::RequiresRegister());
5256 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5257 if (DataType::IsFloatingPointType(value_type)) {
5258 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5259 } else {
5260 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5261 }
5262
5263 if (needs_write_barrier) {
5264 // Temporary registers for the write barrier.
5265 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5266 locations->AddTemp(Location::RequiresRegister());
5267 }
5268 }
5269
VisitArraySet(HArraySet * instruction)5270 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5271 LocationSummary* locations = instruction->GetLocations();
5272 Location array_loc = locations->InAt(0);
5273 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5274 Location index = locations->InAt(1);
5275 Location value = locations->InAt(2);
5276 DataType::Type value_type = instruction->GetComponentType();
5277 bool needs_type_check = instruction->NeedsTypeCheck();
5278 bool needs_write_barrier =
5279 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5280
5281 switch (value_type) {
5282 case DataType::Type::kBool:
5283 case DataType::Type::kUint8:
5284 case DataType::Type::kInt8: {
5285 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5286 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5287 if (value.IsRegister()) {
5288 __ movb(address, value.AsRegister<CpuRegister>());
5289 } else {
5290 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5291 }
5292 codegen_->MaybeRecordImplicitNullCheck(instruction);
5293 break;
5294 }
5295
5296 case DataType::Type::kUint16:
5297 case DataType::Type::kInt16: {
5298 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5299 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5300 if (value.IsRegister()) {
5301 __ movw(address, value.AsRegister<CpuRegister>());
5302 } else {
5303 DCHECK(value.IsConstant()) << value;
5304 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5305 }
5306 codegen_->MaybeRecordImplicitNullCheck(instruction);
5307 break;
5308 }
5309
5310 case DataType::Type::kReference: {
5311 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5312 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5313
5314 if (!value.IsRegister()) {
5315 // Just setting null.
5316 DCHECK(instruction->InputAt(2)->IsNullConstant());
5317 DCHECK(value.IsConstant()) << value;
5318 __ movl(address, Immediate(0));
5319 codegen_->MaybeRecordImplicitNullCheck(instruction);
5320 DCHECK(!needs_write_barrier);
5321 DCHECK(!needs_type_check);
5322 break;
5323 }
5324
5325 DCHECK(needs_write_barrier);
5326 CpuRegister register_value = value.AsRegister<CpuRegister>();
5327 Location temp_loc = locations->GetTemp(0);
5328 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5329
5330 bool can_value_be_null = instruction->GetValueCanBeNull();
5331 NearLabel do_store;
5332 if (can_value_be_null) {
5333 __ testl(register_value, register_value);
5334 __ j(kEqual, &do_store);
5335 }
5336
5337 SlowPathCode* slow_path = nullptr;
5338 if (needs_type_check) {
5339 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5340 codegen_->AddSlowPath(slow_path);
5341
5342 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5343 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5344 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5345
5346 // Note that when Baker read barriers are enabled, the type
5347 // checks are performed without read barriers. This is fine,
5348 // even in the case where a class object is in the from-space
5349 // after the flip, as a comparison involving such a type would
5350 // not produce a false positive; it may of course produce a
5351 // false negative, in which case we would take the ArraySet
5352 // slow path.
5353
5354 // /* HeapReference<Class> */ temp = array->klass_
5355 __ movl(temp, Address(array, class_offset));
5356 codegen_->MaybeRecordImplicitNullCheck(instruction);
5357 __ MaybeUnpoisonHeapReference(temp);
5358
5359 // /* HeapReference<Class> */ temp = temp->component_type_
5360 __ movl(temp, Address(temp, component_offset));
5361 // If heap poisoning is enabled, no need to unpoison `temp`
5362 // nor the object reference in `register_value->klass`, as
5363 // we are comparing two poisoned references.
5364 __ cmpl(temp, Address(register_value, class_offset));
5365
5366 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5367 NearLabel do_put;
5368 __ j(kEqual, &do_put);
5369 // If heap poisoning is enabled, the `temp` reference has
5370 // not been unpoisoned yet; unpoison it now.
5371 __ MaybeUnpoisonHeapReference(temp);
5372
5373 // If heap poisoning is enabled, no need to unpoison the
5374 // heap reference loaded below, as it is only used for a
5375 // comparison with null.
5376 __ cmpl(Address(temp, super_offset), Immediate(0));
5377 __ j(kNotEqual, slow_path->GetEntryLabel());
5378 __ Bind(&do_put);
5379 } else {
5380 __ j(kNotEqual, slow_path->GetEntryLabel());
5381 }
5382 }
5383
5384 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5385 codegen_->MarkGCCard(
5386 temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5387
5388 if (can_value_be_null) {
5389 DCHECK(do_store.IsLinked());
5390 __ Bind(&do_store);
5391 }
5392
5393 Location source = value;
5394 if (kPoisonHeapReferences) {
5395 __ movl(temp, register_value);
5396 __ PoisonHeapReference(temp);
5397 source = temp_loc;
5398 }
5399
5400 __ movl(address, source.AsRegister<CpuRegister>());
5401
5402 if (can_value_be_null || !needs_type_check) {
5403 codegen_->MaybeRecordImplicitNullCheck(instruction);
5404 }
5405
5406 if (slow_path != nullptr) {
5407 __ Bind(slow_path->GetExitLabel());
5408 }
5409
5410 break;
5411 }
5412
5413 case DataType::Type::kInt32: {
5414 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5415 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5416 if (value.IsRegister()) {
5417 __ movl(address, value.AsRegister<CpuRegister>());
5418 } else {
5419 DCHECK(value.IsConstant()) << value;
5420 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5421 __ movl(address, Immediate(v));
5422 }
5423 codegen_->MaybeRecordImplicitNullCheck(instruction);
5424 break;
5425 }
5426
5427 case DataType::Type::kInt64: {
5428 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5429 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5430 if (value.IsRegister()) {
5431 __ movq(address, value.AsRegister<CpuRegister>());
5432 codegen_->MaybeRecordImplicitNullCheck(instruction);
5433 } else {
5434 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5435 Address address_high =
5436 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5437 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5438 }
5439 break;
5440 }
5441
5442 case DataType::Type::kFloat32: {
5443 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5444 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5445 if (value.IsFpuRegister()) {
5446 __ movss(address, value.AsFpuRegister<XmmRegister>());
5447 } else {
5448 DCHECK(value.IsConstant());
5449 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5450 __ movl(address, Immediate(v));
5451 }
5452 codegen_->MaybeRecordImplicitNullCheck(instruction);
5453 break;
5454 }
5455
5456 case DataType::Type::kFloat64: {
5457 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5458 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5459 if (value.IsFpuRegister()) {
5460 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5461 codegen_->MaybeRecordImplicitNullCheck(instruction);
5462 } else {
5463 int64_t v =
5464 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5465 Address address_high =
5466 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5467 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5468 }
5469 break;
5470 }
5471
5472 case DataType::Type::kUint32:
5473 case DataType::Type::kUint64:
5474 case DataType::Type::kVoid:
5475 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5476 UNREACHABLE();
5477 }
5478 }
5479
VisitArrayLength(HArrayLength * instruction)5480 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5481 LocationSummary* locations =
5482 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5483 locations->SetInAt(0, Location::RequiresRegister());
5484 if (!instruction->IsEmittedAtUseSite()) {
5485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5486 }
5487 }
5488
VisitArrayLength(HArrayLength * instruction)5489 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5490 if (instruction->IsEmittedAtUseSite()) {
5491 return;
5492 }
5493
5494 LocationSummary* locations = instruction->GetLocations();
5495 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5496 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5497 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5498 __ movl(out, Address(obj, offset));
5499 codegen_->MaybeRecordImplicitNullCheck(instruction);
5500 // Mask out most significant bit in case the array is String's array of char.
5501 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5502 __ shrl(out, Immediate(1));
5503 }
5504 }
5505
VisitBoundsCheck(HBoundsCheck * instruction)5506 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5507 RegisterSet caller_saves = RegisterSet::Empty();
5508 InvokeRuntimeCallingConvention calling_convention;
5509 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5510 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5511 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5512 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5513 HInstruction* length = instruction->InputAt(1);
5514 if (!length->IsEmittedAtUseSite()) {
5515 locations->SetInAt(1, Location::RegisterOrConstant(length));
5516 }
5517 }
5518
VisitBoundsCheck(HBoundsCheck * instruction)5519 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5520 LocationSummary* locations = instruction->GetLocations();
5521 Location index_loc = locations->InAt(0);
5522 Location length_loc = locations->InAt(1);
5523 SlowPathCode* slow_path =
5524 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5525
5526 if (length_loc.IsConstant()) {
5527 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5528 if (index_loc.IsConstant()) {
5529 // BCE will remove the bounds check if we are guarenteed to pass.
5530 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5531 if (index < 0 || index >= length) {
5532 codegen_->AddSlowPath(slow_path);
5533 __ jmp(slow_path->GetEntryLabel());
5534 } else {
5535 // Some optimization after BCE may have generated this, and we should not
5536 // generate a bounds check if it is a valid range.
5537 }
5538 return;
5539 }
5540
5541 // We have to reverse the jump condition because the length is the constant.
5542 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5543 __ cmpl(index_reg, Immediate(length));
5544 codegen_->AddSlowPath(slow_path);
5545 __ j(kAboveEqual, slow_path->GetEntryLabel());
5546 } else {
5547 HInstruction* array_length = instruction->InputAt(1);
5548 if (array_length->IsEmittedAtUseSite()) {
5549 // Address the length field in the array.
5550 DCHECK(array_length->IsArrayLength());
5551 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5552 Location array_loc = array_length->GetLocations()->InAt(0);
5553 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5554 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5555 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5556 // the string compression flag) with the in-memory length and avoid the temporary.
5557 CpuRegister length_reg = CpuRegister(TMP);
5558 __ movl(length_reg, array_len);
5559 codegen_->MaybeRecordImplicitNullCheck(array_length);
5560 __ shrl(length_reg, Immediate(1));
5561 codegen_->GenerateIntCompare(length_reg, index_loc);
5562 } else {
5563 // Checking the bound for general case:
5564 // Array of char or String's array when the compression feature off.
5565 if (index_loc.IsConstant()) {
5566 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5567 __ cmpl(array_len, Immediate(value));
5568 } else {
5569 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5570 }
5571 codegen_->MaybeRecordImplicitNullCheck(array_length);
5572 }
5573 } else {
5574 codegen_->GenerateIntCompare(length_loc, index_loc);
5575 }
5576 codegen_->AddSlowPath(slow_path);
5577 __ j(kBelowEqual, slow_path->GetEntryLabel());
5578 }
5579 }
5580
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5581 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5582 CpuRegister card,
5583 CpuRegister object,
5584 CpuRegister value,
5585 bool value_can_be_null) {
5586 NearLabel is_null;
5587 if (value_can_be_null) {
5588 __ testl(value, value);
5589 __ j(kEqual, &is_null);
5590 }
5591 // Load the address of the card table into `card`.
5592 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5593 /* no_rip= */ true));
5594 // Calculate the offset (in the card table) of the card corresponding to
5595 // `object`.
5596 __ movq(temp, object);
5597 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5598 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5599 // `object`'s card.
5600 //
5601 // Register `card` contains the address of the card table. Note that the card
5602 // table's base is biased during its creation so that it always starts at an
5603 // address whose least-significant byte is equal to `kCardDirty` (see
5604 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5605 // below writes the `kCardDirty` (byte) value into the `object`'s card
5606 // (located at `card + object >> kCardShift`).
5607 //
5608 // This dual use of the value in register `card` (1. to calculate the location
5609 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5610 // (no need to explicitly load `kCardDirty` as an immediate value).
5611 __ movb(Address(temp, card, TIMES_1, 0), card);
5612 if (value_can_be_null) {
5613 __ Bind(&is_null);
5614 }
5615 }
5616
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5617 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5618 LOG(FATAL) << "Unimplemented";
5619 }
5620
VisitParallelMove(HParallelMove * instruction)5621 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5622 if (instruction->GetNext()->IsSuspendCheck() &&
5623 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5624 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5625 // The back edge will generate the suspend check.
5626 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5627 }
5628
5629 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5630 }
5631
VisitSuspendCheck(HSuspendCheck * instruction)5632 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5633 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5634 instruction, LocationSummary::kCallOnSlowPath);
5635 // In suspend check slow path, usually there are no caller-save registers at all.
5636 // If SIMD instructions are present, however, we force spilling all live SIMD
5637 // registers in full width (since the runtime only saves/restores lower part).
5638 locations->SetCustomSlowPathCallerSaves(
5639 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5640 }
5641
VisitSuspendCheck(HSuspendCheck * instruction)5642 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5643 HBasicBlock* block = instruction->GetBlock();
5644 if (block->GetLoopInformation() != nullptr) {
5645 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5646 // The back edge will generate the suspend check.
5647 return;
5648 }
5649 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5650 // The goto will generate the suspend check.
5651 return;
5652 }
5653 GenerateSuspendCheck(instruction, nullptr);
5654 }
5655
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5656 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5657 HBasicBlock* successor) {
5658 SuspendCheckSlowPathX86_64* slow_path =
5659 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5660 if (slow_path == nullptr) {
5661 slow_path =
5662 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5663 instruction->SetSlowPath(slow_path);
5664 codegen_->AddSlowPath(slow_path);
5665 if (successor != nullptr) {
5666 DCHECK(successor->IsLoopHeader());
5667 }
5668 } else {
5669 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5670 }
5671
5672 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5673 /* no_rip= */ true),
5674 Immediate(0));
5675 if (successor == nullptr) {
5676 __ j(kNotEqual, slow_path->GetEntryLabel());
5677 __ Bind(slow_path->GetReturnLabel());
5678 } else {
5679 __ j(kEqual, codegen_->GetLabelOf(successor));
5680 __ jmp(slow_path->GetEntryLabel());
5681 }
5682 }
5683
GetAssembler() const5684 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5685 return codegen_->GetAssembler();
5686 }
5687
EmitMove(size_t index)5688 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5689 MoveOperands* move = moves_[index];
5690 Location source = move->GetSource();
5691 Location destination = move->GetDestination();
5692
5693 if (source.IsRegister()) {
5694 if (destination.IsRegister()) {
5695 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5696 } else if (destination.IsStackSlot()) {
5697 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5698 source.AsRegister<CpuRegister>());
5699 } else {
5700 DCHECK(destination.IsDoubleStackSlot());
5701 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5702 source.AsRegister<CpuRegister>());
5703 }
5704 } else if (source.IsStackSlot()) {
5705 if (destination.IsRegister()) {
5706 __ movl(destination.AsRegister<CpuRegister>(),
5707 Address(CpuRegister(RSP), source.GetStackIndex()));
5708 } else if (destination.IsFpuRegister()) {
5709 __ movss(destination.AsFpuRegister<XmmRegister>(),
5710 Address(CpuRegister(RSP), source.GetStackIndex()));
5711 } else {
5712 DCHECK(destination.IsStackSlot());
5713 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5714 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5715 }
5716 } else if (source.IsDoubleStackSlot()) {
5717 if (destination.IsRegister()) {
5718 __ movq(destination.AsRegister<CpuRegister>(),
5719 Address(CpuRegister(RSP), source.GetStackIndex()));
5720 } else if (destination.IsFpuRegister()) {
5721 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5722 Address(CpuRegister(RSP), source.GetStackIndex()));
5723 } else {
5724 DCHECK(destination.IsDoubleStackSlot()) << destination;
5725 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5726 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5727 }
5728 } else if (source.IsSIMDStackSlot()) {
5729 if (destination.IsFpuRegister()) {
5730 __ movups(destination.AsFpuRegister<XmmRegister>(),
5731 Address(CpuRegister(RSP), source.GetStackIndex()));
5732 } else {
5733 DCHECK(destination.IsSIMDStackSlot());
5734 size_t high = kX86_64WordSize;
5735 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5736 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5737 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5738 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5739 }
5740 } else if (source.IsConstant()) {
5741 HConstant* constant = source.GetConstant();
5742 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5743 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5744 if (destination.IsRegister()) {
5745 if (value == 0) {
5746 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5747 } else {
5748 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5749 }
5750 } else {
5751 DCHECK(destination.IsStackSlot()) << destination;
5752 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5753 }
5754 } else if (constant->IsLongConstant()) {
5755 int64_t value = constant->AsLongConstant()->GetValue();
5756 if (destination.IsRegister()) {
5757 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5758 } else {
5759 DCHECK(destination.IsDoubleStackSlot()) << destination;
5760 codegen_->Store64BitValueToStack(destination, value);
5761 }
5762 } else if (constant->IsFloatConstant()) {
5763 float fp_value = constant->AsFloatConstant()->GetValue();
5764 if (destination.IsFpuRegister()) {
5765 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5766 codegen_->Load32BitValue(dest, fp_value);
5767 } else {
5768 DCHECK(destination.IsStackSlot()) << destination;
5769 Immediate imm(bit_cast<int32_t, float>(fp_value));
5770 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5771 }
5772 } else {
5773 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5774 double fp_value = constant->AsDoubleConstant()->GetValue();
5775 int64_t value = bit_cast<int64_t, double>(fp_value);
5776 if (destination.IsFpuRegister()) {
5777 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5778 codegen_->Load64BitValue(dest, fp_value);
5779 } else {
5780 DCHECK(destination.IsDoubleStackSlot()) << destination;
5781 codegen_->Store64BitValueToStack(destination, value);
5782 }
5783 }
5784 } else if (source.IsFpuRegister()) {
5785 if (destination.IsFpuRegister()) {
5786 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5787 } else if (destination.IsStackSlot()) {
5788 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5789 source.AsFpuRegister<XmmRegister>());
5790 } else if (destination.IsDoubleStackSlot()) {
5791 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5792 source.AsFpuRegister<XmmRegister>());
5793 } else {
5794 DCHECK(destination.IsSIMDStackSlot());
5795 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5796 source.AsFpuRegister<XmmRegister>());
5797 }
5798 }
5799 }
5800
Exchange32(CpuRegister reg,int mem)5801 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5802 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5803 __ movl(Address(CpuRegister(RSP), mem), reg);
5804 __ movl(reg, CpuRegister(TMP));
5805 }
5806
Exchange64(CpuRegister reg1,CpuRegister reg2)5807 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5808 __ movq(CpuRegister(TMP), reg1);
5809 __ movq(reg1, reg2);
5810 __ movq(reg2, CpuRegister(TMP));
5811 }
5812
Exchange64(CpuRegister reg,int mem)5813 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5814 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5815 __ movq(Address(CpuRegister(RSP), mem), reg);
5816 __ movq(reg, CpuRegister(TMP));
5817 }
5818
Exchange32(XmmRegister reg,int mem)5819 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5820 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5821 __ movss(Address(CpuRegister(RSP), mem), reg);
5822 __ movd(reg, CpuRegister(TMP));
5823 }
5824
Exchange64(XmmRegister reg,int mem)5825 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5826 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5827 __ movsd(Address(CpuRegister(RSP), mem), reg);
5828 __ movd(reg, CpuRegister(TMP));
5829 }
5830
Exchange128(XmmRegister reg,int mem)5831 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5832 size_t extra_slot = 2 * kX86_64WordSize;
5833 __ subq(CpuRegister(RSP), Immediate(extra_slot));
5834 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5835 ExchangeMemory64(0, mem + extra_slot, 2);
5836 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5837 __ addq(CpuRegister(RSP), Immediate(extra_slot));
5838 }
5839
ExchangeMemory32(int mem1,int mem2)5840 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5841 ScratchRegisterScope ensure_scratch(
5842 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5843
5844 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5845 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5846 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5847 Address(CpuRegister(RSP), mem2 + stack_offset));
5848 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5849 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5850 CpuRegister(ensure_scratch.GetRegister()));
5851 }
5852
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5853 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5854 ScratchRegisterScope ensure_scratch(
5855 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5856
5857 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5858
5859 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5860 for (int i = 0; i < num_of_qwords; i++) {
5861 __ movq(CpuRegister(TMP),
5862 Address(CpuRegister(RSP), mem1 + stack_offset));
5863 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5864 Address(CpuRegister(RSP), mem2 + stack_offset));
5865 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5866 CpuRegister(TMP));
5867 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5868 CpuRegister(ensure_scratch.GetRegister()));
5869 stack_offset += kX86_64WordSize;
5870 }
5871 }
5872
EmitSwap(size_t index)5873 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5874 MoveOperands* move = moves_[index];
5875 Location source = move->GetSource();
5876 Location destination = move->GetDestination();
5877
5878 if (source.IsRegister() && destination.IsRegister()) {
5879 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5880 } else if (source.IsRegister() && destination.IsStackSlot()) {
5881 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5882 } else if (source.IsStackSlot() && destination.IsRegister()) {
5883 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5884 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5885 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5886 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5887 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5888 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5889 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5890 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5891 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5892 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5893 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5894 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5895 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5896 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5897 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5898 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5899 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5900 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5901 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5902 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5903 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5904 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5905 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5906 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5907 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5908 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5909 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5910 } else {
5911 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5912 }
5913 }
5914
5915
SpillScratch(int reg)5916 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5917 __ pushq(CpuRegister(reg));
5918 }
5919
5920
RestoreScratch(int reg)5921 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5922 __ popq(CpuRegister(reg));
5923 }
5924
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5925 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5926 SlowPathCode* slow_path, CpuRegister class_reg) {
5927 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5928 const size_t status_byte_offset =
5929 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5930 constexpr uint32_t shifted_visibly_initialized_value =
5931 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
5932
5933 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
5934 __ j(kBelow, slow_path->GetEntryLabel());
5935 __ Bind(slow_path->GetExitLabel());
5936 }
5937
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)5938 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
5939 CpuRegister temp) {
5940 uint32_t path_to_root = check->GetBitstringPathToRoot();
5941 uint32_t mask = check->GetBitstringMask();
5942 DCHECK(IsPowerOfTwo(mask + 1));
5943 size_t mask_bits = WhichPowerOf2(mask + 1);
5944
5945 if (mask_bits == 16u) {
5946 // Compare the bitstring in memory.
5947 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
5948 } else {
5949 // /* uint32_t */ temp = temp->status_
5950 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
5951 // Compare the bitstring bits using SUB.
5952 __ subl(temp, Immediate(path_to_root));
5953 // Shift out bits that do not contribute to the comparison.
5954 __ shll(temp, Immediate(32u - mask_bits));
5955 }
5956 }
5957
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5958 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5959 HLoadClass::LoadKind desired_class_load_kind) {
5960 switch (desired_class_load_kind) {
5961 case HLoadClass::LoadKind::kInvalid:
5962 LOG(FATAL) << "UNREACHABLE";
5963 UNREACHABLE();
5964 case HLoadClass::LoadKind::kReferrersClass:
5965 break;
5966 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5967 case HLoadClass::LoadKind::kBootImageRelRo:
5968 case HLoadClass::LoadKind::kBssEntry:
5969 DCHECK(!Runtime::Current()->UseJitCompilation());
5970 break;
5971 case HLoadClass::LoadKind::kJitBootImageAddress:
5972 case HLoadClass::LoadKind::kJitTableAddress:
5973 DCHECK(Runtime::Current()->UseJitCompilation());
5974 break;
5975 case HLoadClass::LoadKind::kRuntimeCall:
5976 break;
5977 }
5978 return desired_class_load_kind;
5979 }
5980
VisitLoadClass(HLoadClass * cls)5981 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5982 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5983 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5984 // Custom calling convention: RAX serves as both input and output.
5985 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5986 cls,
5987 Location::RegisterLocation(RAX),
5988 Location::RegisterLocation(RAX));
5989 return;
5990 }
5991 DCHECK(!cls->NeedsAccessCheck());
5992
5993 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5994 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5995 ? LocationSummary::kCallOnSlowPath
5996 : LocationSummary::kNoCall;
5997 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5998 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5999 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6000 }
6001
6002 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6003 locations->SetInAt(0, Location::RequiresRegister());
6004 }
6005 locations->SetOut(Location::RequiresRegister());
6006 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6007 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6008 // Rely on the type resolution and/or initialization to save everything.
6009 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6010 } else {
6011 // For non-Baker read barrier we have a temp-clobbering call.
6012 }
6013 }
6014 }
6015
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6016 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6017 dex::TypeIndex type_index,
6018 Handle<mirror::Class> handle) {
6019 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6020 // Add a patch entry and return the label.
6021 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6022 PatchInfo<Label>* info = &jit_class_patches_.back();
6023 return &info->label;
6024 }
6025
6026 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6027 // move.
VisitLoadClass(HLoadClass * cls)6028 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6029 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6030 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6031 codegen_->GenerateLoadClassRuntimeCall(cls);
6032 return;
6033 }
6034 DCHECK(!cls->NeedsAccessCheck());
6035
6036 LocationSummary* locations = cls->GetLocations();
6037 Location out_loc = locations->Out();
6038 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6039
6040 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6041 ? kWithoutReadBarrier
6042 : kCompilerReadBarrierOption;
6043 bool generate_null_check = false;
6044 switch (load_kind) {
6045 case HLoadClass::LoadKind::kReferrersClass: {
6046 DCHECK(!cls->CanCallRuntime());
6047 DCHECK(!cls->MustGenerateClinitCheck());
6048 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6049 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6050 GenerateGcRootFieldLoad(
6051 cls,
6052 out_loc,
6053 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6054 /* fixup_label= */ nullptr,
6055 read_barrier_option);
6056 break;
6057 }
6058 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6059 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6060 codegen_->GetCompilerOptions().IsBootImageExtension());
6061 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6062 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6063 codegen_->RecordBootImageTypePatch(cls);
6064 break;
6065 case HLoadClass::LoadKind::kBootImageRelRo: {
6066 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6067 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6068 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
6069 break;
6070 }
6071 case HLoadClass::LoadKind::kBssEntry: {
6072 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6073 /* no_rip= */ false);
6074 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6075 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6076 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6077 // No need for memory fence, thanks to the x86-64 memory model.
6078 generate_null_check = true;
6079 break;
6080 }
6081 case HLoadClass::LoadKind::kJitBootImageAddress: {
6082 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6083 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6084 DCHECK_NE(address, 0u);
6085 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6086 break;
6087 }
6088 case HLoadClass::LoadKind::kJitTableAddress: {
6089 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6090 /* no_rip= */ true);
6091 Label* fixup_label =
6092 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6093 // /* GcRoot<mirror::Class> */ out = *address
6094 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6095 break;
6096 }
6097 default:
6098 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6099 UNREACHABLE();
6100 }
6101
6102 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6103 DCHECK(cls->CanCallRuntime());
6104 SlowPathCode* slow_path =
6105 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6106 codegen_->AddSlowPath(slow_path);
6107 if (generate_null_check) {
6108 __ testl(out, out);
6109 __ j(kEqual, slow_path->GetEntryLabel());
6110 }
6111 if (cls->MustGenerateClinitCheck()) {
6112 GenerateClassInitializationCheck(slow_path, out);
6113 } else {
6114 __ Bind(slow_path->GetExitLabel());
6115 }
6116 }
6117 }
6118
VisitClinitCheck(HClinitCheck * check)6119 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6120 LocationSummary* locations =
6121 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6122 locations->SetInAt(0, Location::RequiresRegister());
6123 if (check->HasUses()) {
6124 locations->SetOut(Location::SameAsFirstInput());
6125 }
6126 // Rely on the type initialization to save everything we need.
6127 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6128 }
6129
VisitLoadMethodHandle(HLoadMethodHandle * load)6130 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6131 // Custom calling convention: RAX serves as both input and output.
6132 Location location = Location::RegisterLocation(RAX);
6133 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6134 }
6135
VisitLoadMethodHandle(HLoadMethodHandle * load)6136 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6137 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6138 }
6139
VisitLoadMethodType(HLoadMethodType * load)6140 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6141 // Custom calling convention: RAX serves as both input and output.
6142 Location location = Location::RegisterLocation(RAX);
6143 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6144 }
6145
VisitLoadMethodType(HLoadMethodType * load)6146 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6147 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6148 }
6149
VisitClinitCheck(HClinitCheck * check)6150 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6151 // We assume the class to not be null.
6152 SlowPathCode* slow_path =
6153 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6154 codegen_->AddSlowPath(slow_path);
6155 GenerateClassInitializationCheck(slow_path,
6156 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6157 }
6158
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6159 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6160 HLoadString::LoadKind desired_string_load_kind) {
6161 switch (desired_string_load_kind) {
6162 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6163 case HLoadString::LoadKind::kBootImageRelRo:
6164 case HLoadString::LoadKind::kBssEntry:
6165 DCHECK(!Runtime::Current()->UseJitCompilation());
6166 break;
6167 case HLoadString::LoadKind::kJitBootImageAddress:
6168 case HLoadString::LoadKind::kJitTableAddress:
6169 DCHECK(Runtime::Current()->UseJitCompilation());
6170 break;
6171 case HLoadString::LoadKind::kRuntimeCall:
6172 break;
6173 }
6174 return desired_string_load_kind;
6175 }
6176
VisitLoadString(HLoadString * load)6177 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6178 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6179 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6180 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6181 locations->SetOut(Location::RegisterLocation(RAX));
6182 } else {
6183 locations->SetOut(Location::RequiresRegister());
6184 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6185 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6186 // Rely on the pResolveString to save everything.
6187 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6188 } else {
6189 // For non-Baker read barrier we have a temp-clobbering call.
6190 }
6191 }
6192 }
6193 }
6194
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6195 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6196 dex::StringIndex string_index,
6197 Handle<mirror::String> handle) {
6198 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6199 // Add a patch entry and return the label.
6200 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6201 PatchInfo<Label>* info = &jit_string_patches_.back();
6202 return &info->label;
6203 }
6204
6205 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6206 // move.
VisitLoadString(HLoadString * load)6207 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6208 LocationSummary* locations = load->GetLocations();
6209 Location out_loc = locations->Out();
6210 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6211
6212 switch (load->GetLoadKind()) {
6213 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6214 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6215 codegen_->GetCompilerOptions().IsBootImageExtension());
6216 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6217 codegen_->RecordBootImageStringPatch(load);
6218 return;
6219 }
6220 case HLoadString::LoadKind::kBootImageRelRo: {
6221 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6222 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6223 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
6224 return;
6225 }
6226 case HLoadString::LoadKind::kBssEntry: {
6227 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6228 /* no_rip= */ false);
6229 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6230 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6231 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6232 // No need for memory fence, thanks to the x86-64 memory model.
6233 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6234 codegen_->AddSlowPath(slow_path);
6235 __ testl(out, out);
6236 __ j(kEqual, slow_path->GetEntryLabel());
6237 __ Bind(slow_path->GetExitLabel());
6238 return;
6239 }
6240 case HLoadString::LoadKind::kJitBootImageAddress: {
6241 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6242 DCHECK_NE(address, 0u);
6243 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6244 return;
6245 }
6246 case HLoadString::LoadKind::kJitTableAddress: {
6247 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6248 /* no_rip= */ true);
6249 Label* fixup_label = codegen_->NewJitRootStringPatch(
6250 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6251 // /* GcRoot<mirror::String> */ out = *address
6252 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6253 return;
6254 }
6255 default:
6256 break;
6257 }
6258
6259 // TODO: Re-add the compiler code to do string dex cache lookup again.
6260 // Custom calling convention: RAX serves as both input and output.
6261 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6262 codegen_->InvokeRuntime(kQuickResolveString,
6263 load,
6264 load->GetDexPc());
6265 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6266 }
6267
GetExceptionTlsAddress()6268 static Address GetExceptionTlsAddress() {
6269 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6270 /* no_rip= */ true);
6271 }
6272
VisitLoadException(HLoadException * load)6273 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6274 LocationSummary* locations =
6275 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6276 locations->SetOut(Location::RequiresRegister());
6277 }
6278
VisitLoadException(HLoadException * load)6279 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6280 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6281 }
6282
VisitClearException(HClearException * clear)6283 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6284 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6285 }
6286
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6287 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6288 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6289 }
6290
VisitThrow(HThrow * instruction)6291 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6292 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6293 instruction, LocationSummary::kCallOnMainOnly);
6294 InvokeRuntimeCallingConvention calling_convention;
6295 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6296 }
6297
VisitThrow(HThrow * instruction)6298 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6299 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6300 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6301 }
6302
6303 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6304 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6305 if (kEmitCompilerReadBarrier &&
6306 !kUseBakerReadBarrier &&
6307 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6308 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6309 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6310 return 1;
6311 }
6312 return 0;
6313 }
6314
6315 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6316 // interface pointer, the current interface is compared in memory.
6317 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6318 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6319 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6320 return 2;
6321 }
6322 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6323 }
6324
VisitInstanceOf(HInstanceOf * instruction)6325 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6326 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6327 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6328 bool baker_read_barrier_slow_path = false;
6329 switch (type_check_kind) {
6330 case TypeCheckKind::kExactCheck:
6331 case TypeCheckKind::kAbstractClassCheck:
6332 case TypeCheckKind::kClassHierarchyCheck:
6333 case TypeCheckKind::kArrayObjectCheck: {
6334 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6335 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6336 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6337 break;
6338 }
6339 case TypeCheckKind::kArrayCheck:
6340 case TypeCheckKind::kUnresolvedCheck:
6341 case TypeCheckKind::kInterfaceCheck:
6342 call_kind = LocationSummary::kCallOnSlowPath;
6343 break;
6344 case TypeCheckKind::kBitstringCheck:
6345 break;
6346 }
6347
6348 LocationSummary* locations =
6349 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6350 if (baker_read_barrier_slow_path) {
6351 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6352 }
6353 locations->SetInAt(0, Location::RequiresRegister());
6354 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6355 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6356 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6357 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6358 } else {
6359 locations->SetInAt(1, Location::Any());
6360 }
6361 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6362 locations->SetOut(Location::RequiresRegister());
6363 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6364 }
6365
VisitInstanceOf(HInstanceOf * instruction)6366 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6367 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6368 LocationSummary* locations = instruction->GetLocations();
6369 Location obj_loc = locations->InAt(0);
6370 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6371 Location cls = locations->InAt(1);
6372 Location out_loc = locations->Out();
6373 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6374 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6375 DCHECK_LE(num_temps, 1u);
6376 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6377 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6378 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6379 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6380 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6381 SlowPathCode* slow_path = nullptr;
6382 NearLabel done, zero;
6383
6384 // Return 0 if `obj` is null.
6385 // Avoid null check if we know obj is not null.
6386 if (instruction->MustDoNullCheck()) {
6387 __ testl(obj, obj);
6388 __ j(kEqual, &zero);
6389 }
6390
6391 switch (type_check_kind) {
6392 case TypeCheckKind::kExactCheck: {
6393 ReadBarrierOption read_barrier_option =
6394 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6395 // /* HeapReference<Class> */ out = obj->klass_
6396 GenerateReferenceLoadTwoRegisters(instruction,
6397 out_loc,
6398 obj_loc,
6399 class_offset,
6400 read_barrier_option);
6401 if (cls.IsRegister()) {
6402 __ cmpl(out, cls.AsRegister<CpuRegister>());
6403 } else {
6404 DCHECK(cls.IsStackSlot()) << cls;
6405 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6406 }
6407 if (zero.IsLinked()) {
6408 // Classes must be equal for the instanceof to succeed.
6409 __ j(kNotEqual, &zero);
6410 __ movl(out, Immediate(1));
6411 __ jmp(&done);
6412 } else {
6413 __ setcc(kEqual, out);
6414 // setcc only sets the low byte.
6415 __ andl(out, Immediate(1));
6416 }
6417 break;
6418 }
6419
6420 case TypeCheckKind::kAbstractClassCheck: {
6421 ReadBarrierOption read_barrier_option =
6422 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6423 // /* HeapReference<Class> */ out = obj->klass_
6424 GenerateReferenceLoadTwoRegisters(instruction,
6425 out_loc,
6426 obj_loc,
6427 class_offset,
6428 read_barrier_option);
6429 // If the class is abstract, we eagerly fetch the super class of the
6430 // object to avoid doing a comparison we know will fail.
6431 NearLabel loop, success;
6432 __ Bind(&loop);
6433 // /* HeapReference<Class> */ out = out->super_class_
6434 GenerateReferenceLoadOneRegister(instruction,
6435 out_loc,
6436 super_offset,
6437 maybe_temp_loc,
6438 read_barrier_option);
6439 __ testl(out, out);
6440 // If `out` is null, we use it for the result, and jump to `done`.
6441 __ j(kEqual, &done);
6442 if (cls.IsRegister()) {
6443 __ cmpl(out, cls.AsRegister<CpuRegister>());
6444 } else {
6445 DCHECK(cls.IsStackSlot()) << cls;
6446 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6447 }
6448 __ j(kNotEqual, &loop);
6449 __ movl(out, Immediate(1));
6450 if (zero.IsLinked()) {
6451 __ jmp(&done);
6452 }
6453 break;
6454 }
6455
6456 case TypeCheckKind::kClassHierarchyCheck: {
6457 ReadBarrierOption read_barrier_option =
6458 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6459 // /* HeapReference<Class> */ out = obj->klass_
6460 GenerateReferenceLoadTwoRegisters(instruction,
6461 out_loc,
6462 obj_loc,
6463 class_offset,
6464 read_barrier_option);
6465 // Walk over the class hierarchy to find a match.
6466 NearLabel loop, success;
6467 __ Bind(&loop);
6468 if (cls.IsRegister()) {
6469 __ cmpl(out, cls.AsRegister<CpuRegister>());
6470 } else {
6471 DCHECK(cls.IsStackSlot()) << cls;
6472 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6473 }
6474 __ j(kEqual, &success);
6475 // /* HeapReference<Class> */ out = out->super_class_
6476 GenerateReferenceLoadOneRegister(instruction,
6477 out_loc,
6478 super_offset,
6479 maybe_temp_loc,
6480 read_barrier_option);
6481 __ testl(out, out);
6482 __ j(kNotEqual, &loop);
6483 // If `out` is null, we use it for the result, and jump to `done`.
6484 __ jmp(&done);
6485 __ Bind(&success);
6486 __ movl(out, Immediate(1));
6487 if (zero.IsLinked()) {
6488 __ jmp(&done);
6489 }
6490 break;
6491 }
6492
6493 case TypeCheckKind::kArrayObjectCheck: {
6494 ReadBarrierOption read_barrier_option =
6495 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6496 // /* HeapReference<Class> */ out = obj->klass_
6497 GenerateReferenceLoadTwoRegisters(instruction,
6498 out_loc,
6499 obj_loc,
6500 class_offset,
6501 read_barrier_option);
6502 // Do an exact check.
6503 NearLabel exact_check;
6504 if (cls.IsRegister()) {
6505 __ cmpl(out, cls.AsRegister<CpuRegister>());
6506 } else {
6507 DCHECK(cls.IsStackSlot()) << cls;
6508 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6509 }
6510 __ j(kEqual, &exact_check);
6511 // Otherwise, we need to check that the object's class is a non-primitive array.
6512 // /* HeapReference<Class> */ out = out->component_type_
6513 GenerateReferenceLoadOneRegister(instruction,
6514 out_loc,
6515 component_offset,
6516 maybe_temp_loc,
6517 read_barrier_option);
6518 __ testl(out, out);
6519 // If `out` is null, we use it for the result, and jump to `done`.
6520 __ j(kEqual, &done);
6521 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6522 __ j(kNotEqual, &zero);
6523 __ Bind(&exact_check);
6524 __ movl(out, Immediate(1));
6525 __ jmp(&done);
6526 break;
6527 }
6528
6529 case TypeCheckKind::kArrayCheck: {
6530 // No read barrier since the slow path will retry upon failure.
6531 // /* HeapReference<Class> */ out = obj->klass_
6532 GenerateReferenceLoadTwoRegisters(instruction,
6533 out_loc,
6534 obj_loc,
6535 class_offset,
6536 kWithoutReadBarrier);
6537 if (cls.IsRegister()) {
6538 __ cmpl(out, cls.AsRegister<CpuRegister>());
6539 } else {
6540 DCHECK(cls.IsStackSlot()) << cls;
6541 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6542 }
6543 DCHECK(locations->OnlyCallsOnSlowPath());
6544 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6545 instruction, /* is_fatal= */ false);
6546 codegen_->AddSlowPath(slow_path);
6547 __ j(kNotEqual, slow_path->GetEntryLabel());
6548 __ movl(out, Immediate(1));
6549 if (zero.IsLinked()) {
6550 __ jmp(&done);
6551 }
6552 break;
6553 }
6554
6555 case TypeCheckKind::kUnresolvedCheck:
6556 case TypeCheckKind::kInterfaceCheck: {
6557 // Note that we indeed only call on slow path, but we always go
6558 // into the slow path for the unresolved and interface check
6559 // cases.
6560 //
6561 // We cannot directly call the InstanceofNonTrivial runtime
6562 // entry point without resorting to a type checking slow path
6563 // here (i.e. by calling InvokeRuntime directly), as it would
6564 // require to assign fixed registers for the inputs of this
6565 // HInstanceOf instruction (following the runtime calling
6566 // convention), which might be cluttered by the potential first
6567 // read barrier emission at the beginning of this method.
6568 //
6569 // TODO: Introduce a new runtime entry point taking the object
6570 // to test (instead of its class) as argument, and let it deal
6571 // with the read barrier issues. This will let us refactor this
6572 // case of the `switch` code as it was previously (with a direct
6573 // call to the runtime not using a type checking slow path).
6574 // This should also be beneficial for the other cases above.
6575 DCHECK(locations->OnlyCallsOnSlowPath());
6576 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6577 instruction, /* is_fatal= */ false);
6578 codegen_->AddSlowPath(slow_path);
6579 __ jmp(slow_path->GetEntryLabel());
6580 if (zero.IsLinked()) {
6581 __ jmp(&done);
6582 }
6583 break;
6584 }
6585
6586 case TypeCheckKind::kBitstringCheck: {
6587 // /* HeapReference<Class> */ temp = obj->klass_
6588 GenerateReferenceLoadTwoRegisters(instruction,
6589 out_loc,
6590 obj_loc,
6591 class_offset,
6592 kWithoutReadBarrier);
6593
6594 GenerateBitstringTypeCheckCompare(instruction, out);
6595 if (zero.IsLinked()) {
6596 __ j(kNotEqual, &zero);
6597 __ movl(out, Immediate(1));
6598 __ jmp(&done);
6599 } else {
6600 __ setcc(kEqual, out);
6601 // setcc only sets the low byte.
6602 __ andl(out, Immediate(1));
6603 }
6604 break;
6605 }
6606 }
6607
6608 if (zero.IsLinked()) {
6609 __ Bind(&zero);
6610 __ xorl(out, out);
6611 }
6612
6613 if (done.IsLinked()) {
6614 __ Bind(&done);
6615 }
6616
6617 if (slow_path != nullptr) {
6618 __ Bind(slow_path->GetExitLabel());
6619 }
6620 }
6621
VisitCheckCast(HCheckCast * instruction)6622 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6623 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6624 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6625 LocationSummary* locations =
6626 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6627 locations->SetInAt(0, Location::RequiresRegister());
6628 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6629 // Require a register for the interface check since there is a loop that compares the class to
6630 // a memory address.
6631 locations->SetInAt(1, Location::RequiresRegister());
6632 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6633 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6634 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6635 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6636 } else {
6637 locations->SetInAt(1, Location::Any());
6638 }
6639 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6640 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6641 }
6642
VisitCheckCast(HCheckCast * instruction)6643 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6644 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6645 LocationSummary* locations = instruction->GetLocations();
6646 Location obj_loc = locations->InAt(0);
6647 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6648 Location cls = locations->InAt(1);
6649 Location temp_loc = locations->GetTemp(0);
6650 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6651 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6652 DCHECK_GE(num_temps, 1u);
6653 DCHECK_LE(num_temps, 2u);
6654 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6655 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6656 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6657 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6658 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6659 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6660 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6661 const uint32_t object_array_data_offset =
6662 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6663
6664 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6665 SlowPathCode* type_check_slow_path =
6666 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6667 instruction, is_type_check_slow_path_fatal);
6668 codegen_->AddSlowPath(type_check_slow_path);
6669
6670
6671 NearLabel done;
6672 // Avoid null check if we know obj is not null.
6673 if (instruction->MustDoNullCheck()) {
6674 __ testl(obj, obj);
6675 __ j(kEqual, &done);
6676 }
6677
6678 switch (type_check_kind) {
6679 case TypeCheckKind::kExactCheck:
6680 case TypeCheckKind::kArrayCheck: {
6681 // /* HeapReference<Class> */ temp = obj->klass_
6682 GenerateReferenceLoadTwoRegisters(instruction,
6683 temp_loc,
6684 obj_loc,
6685 class_offset,
6686 kWithoutReadBarrier);
6687 if (cls.IsRegister()) {
6688 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6689 } else {
6690 DCHECK(cls.IsStackSlot()) << cls;
6691 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6692 }
6693 // Jump to slow path for throwing the exception or doing a
6694 // more involved array check.
6695 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6696 break;
6697 }
6698
6699 case TypeCheckKind::kAbstractClassCheck: {
6700 // /* HeapReference<Class> */ temp = obj->klass_
6701 GenerateReferenceLoadTwoRegisters(instruction,
6702 temp_loc,
6703 obj_loc,
6704 class_offset,
6705 kWithoutReadBarrier);
6706 // If the class is abstract, we eagerly fetch the super class of the
6707 // object to avoid doing a comparison we know will fail.
6708 NearLabel loop;
6709 __ Bind(&loop);
6710 // /* HeapReference<Class> */ temp = temp->super_class_
6711 GenerateReferenceLoadOneRegister(instruction,
6712 temp_loc,
6713 super_offset,
6714 maybe_temp2_loc,
6715 kWithoutReadBarrier);
6716
6717 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6718 // exception.
6719 __ testl(temp, temp);
6720 // Otherwise, compare the classes.
6721 __ j(kZero, type_check_slow_path->GetEntryLabel());
6722 if (cls.IsRegister()) {
6723 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6724 } else {
6725 DCHECK(cls.IsStackSlot()) << cls;
6726 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6727 }
6728 __ j(kNotEqual, &loop);
6729 break;
6730 }
6731
6732 case TypeCheckKind::kClassHierarchyCheck: {
6733 // /* HeapReference<Class> */ temp = obj->klass_
6734 GenerateReferenceLoadTwoRegisters(instruction,
6735 temp_loc,
6736 obj_loc,
6737 class_offset,
6738 kWithoutReadBarrier);
6739 // Walk over the class hierarchy to find a match.
6740 NearLabel loop;
6741 __ Bind(&loop);
6742 if (cls.IsRegister()) {
6743 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6744 } else {
6745 DCHECK(cls.IsStackSlot()) << cls;
6746 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6747 }
6748 __ j(kEqual, &done);
6749
6750 // /* HeapReference<Class> */ temp = temp->super_class_
6751 GenerateReferenceLoadOneRegister(instruction,
6752 temp_loc,
6753 super_offset,
6754 maybe_temp2_loc,
6755 kWithoutReadBarrier);
6756
6757 // If the class reference currently in `temp` is not null, jump
6758 // back at the beginning of the loop.
6759 __ testl(temp, temp);
6760 __ j(kNotZero, &loop);
6761 // Otherwise, jump to the slow path to throw the exception.
6762 __ jmp(type_check_slow_path->GetEntryLabel());
6763 break;
6764 }
6765
6766 case TypeCheckKind::kArrayObjectCheck: {
6767 // /* HeapReference<Class> */ temp = obj->klass_
6768 GenerateReferenceLoadTwoRegisters(instruction,
6769 temp_loc,
6770 obj_loc,
6771 class_offset,
6772 kWithoutReadBarrier);
6773 // Do an exact check.
6774 NearLabel check_non_primitive_component_type;
6775 if (cls.IsRegister()) {
6776 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6777 } else {
6778 DCHECK(cls.IsStackSlot()) << cls;
6779 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6780 }
6781 __ j(kEqual, &done);
6782
6783 // Otherwise, we need to check that the object's class is a non-primitive array.
6784 // /* HeapReference<Class> */ temp = temp->component_type_
6785 GenerateReferenceLoadOneRegister(instruction,
6786 temp_loc,
6787 component_offset,
6788 maybe_temp2_loc,
6789 kWithoutReadBarrier);
6790
6791 // If the component type is not null (i.e. the object is indeed
6792 // an array), jump to label `check_non_primitive_component_type`
6793 // to further check that this component type is not a primitive
6794 // type.
6795 __ testl(temp, temp);
6796 // Otherwise, jump to the slow path to throw the exception.
6797 __ j(kZero, type_check_slow_path->GetEntryLabel());
6798 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6799 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6800 break;
6801 }
6802
6803 case TypeCheckKind::kUnresolvedCheck: {
6804 // We always go into the type check slow path for the unresolved case.
6805 //
6806 // We cannot directly call the CheckCast runtime entry point
6807 // without resorting to a type checking slow path here (i.e. by
6808 // calling InvokeRuntime directly), as it would require to
6809 // assign fixed registers for the inputs of this HInstanceOf
6810 // instruction (following the runtime calling convention), which
6811 // might be cluttered by the potential first read barrier
6812 // emission at the beginning of this method.
6813 __ jmp(type_check_slow_path->GetEntryLabel());
6814 break;
6815 }
6816
6817 case TypeCheckKind::kInterfaceCheck: {
6818 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6819 // We can not get false positives by doing this.
6820 // /* HeapReference<Class> */ temp = obj->klass_
6821 GenerateReferenceLoadTwoRegisters(instruction,
6822 temp_loc,
6823 obj_loc,
6824 class_offset,
6825 kWithoutReadBarrier);
6826
6827 // /* HeapReference<Class> */ temp = temp->iftable_
6828 GenerateReferenceLoadTwoRegisters(instruction,
6829 temp_loc,
6830 temp_loc,
6831 iftable_offset,
6832 kWithoutReadBarrier);
6833 // Iftable is never null.
6834 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6835 // Maybe poison the `cls` for direct comparison with memory.
6836 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6837 // Loop through the iftable and check if any class matches.
6838 NearLabel start_loop;
6839 __ Bind(&start_loop);
6840 // Need to subtract first to handle the empty array case.
6841 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6842 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6843 // Go to next interface if the classes do not match.
6844 __ cmpl(cls.AsRegister<CpuRegister>(),
6845 CodeGeneratorX86_64::ArrayAddress(temp,
6846 maybe_temp2_loc,
6847 TIMES_4,
6848 object_array_data_offset));
6849 __ j(kNotEqual, &start_loop); // Return if same class.
6850 // If `cls` was poisoned above, unpoison it.
6851 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6852 break;
6853 }
6854
6855 case TypeCheckKind::kBitstringCheck: {
6856 // /* HeapReference<Class> */ temp = obj->klass_
6857 GenerateReferenceLoadTwoRegisters(instruction,
6858 temp_loc,
6859 obj_loc,
6860 class_offset,
6861 kWithoutReadBarrier);
6862
6863 GenerateBitstringTypeCheckCompare(instruction, temp);
6864 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6865 break;
6866 }
6867 }
6868
6869 if (done.IsLinked()) {
6870 __ Bind(&done);
6871 }
6872
6873 __ Bind(type_check_slow_path->GetExitLabel());
6874 }
6875
VisitMonitorOperation(HMonitorOperation * instruction)6876 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6877 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6878 instruction, LocationSummary::kCallOnMainOnly);
6879 InvokeRuntimeCallingConvention calling_convention;
6880 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6881 }
6882
VisitMonitorOperation(HMonitorOperation * instruction)6883 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6884 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6885 instruction,
6886 instruction->GetDexPc());
6887 if (instruction->IsEnter()) {
6888 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6889 } else {
6890 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6891 }
6892 }
6893
VisitX86AndNot(HX86AndNot * instruction)6894 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6895 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6896 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6897 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6898 locations->SetInAt(0, Location::RequiresRegister());
6899 // There is no immediate variant of negated bitwise and in X86.
6900 locations->SetInAt(1, Location::RequiresRegister());
6901 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6902 }
6903
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6904 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6905 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6906 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6907 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6908 locations->SetInAt(0, Location::RequiresRegister());
6909 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6910 }
6911
VisitX86AndNot(HX86AndNot * instruction)6912 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6913 LocationSummary* locations = instruction->GetLocations();
6914 Location first = locations->InAt(0);
6915 Location second = locations->InAt(1);
6916 Location dest = locations->Out();
6917 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6918 }
6919
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6920 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6921 LocationSummary* locations = instruction->GetLocations();
6922 Location src = locations->InAt(0);
6923 Location dest = locations->Out();
6924 switch (instruction->GetOpKind()) {
6925 case HInstruction::kAnd:
6926 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6927 break;
6928 case HInstruction::kXor:
6929 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6930 break;
6931 default:
6932 LOG(FATAL) << "Unreachable";
6933 }
6934 }
6935
VisitAnd(HAnd * instruction)6936 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6937 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6938 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6939
HandleBitwiseOperation(HBinaryOperation * instruction)6940 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6941 LocationSummary* locations =
6942 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6943 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6944 || instruction->GetResultType() == DataType::Type::kInt64);
6945 locations->SetInAt(0, Location::RequiresRegister());
6946 locations->SetInAt(1, Location::Any());
6947 locations->SetOut(Location::SameAsFirstInput());
6948 }
6949
VisitAnd(HAnd * instruction)6950 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6951 HandleBitwiseOperation(instruction);
6952 }
6953
VisitOr(HOr * instruction)6954 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6955 HandleBitwiseOperation(instruction);
6956 }
6957
VisitXor(HXor * instruction)6958 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6959 HandleBitwiseOperation(instruction);
6960 }
6961
HandleBitwiseOperation(HBinaryOperation * instruction)6962 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6963 LocationSummary* locations = instruction->GetLocations();
6964 Location first = locations->InAt(0);
6965 Location second = locations->InAt(1);
6966 DCHECK(first.Equals(locations->Out()));
6967
6968 if (instruction->GetResultType() == DataType::Type::kInt32) {
6969 if (second.IsRegister()) {
6970 if (instruction->IsAnd()) {
6971 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6972 } else if (instruction->IsOr()) {
6973 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6974 } else {
6975 DCHECK(instruction->IsXor());
6976 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6977 }
6978 } else if (second.IsConstant()) {
6979 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6980 if (instruction->IsAnd()) {
6981 __ andl(first.AsRegister<CpuRegister>(), imm);
6982 } else if (instruction->IsOr()) {
6983 __ orl(first.AsRegister<CpuRegister>(), imm);
6984 } else {
6985 DCHECK(instruction->IsXor());
6986 __ xorl(first.AsRegister<CpuRegister>(), imm);
6987 }
6988 } else {
6989 Address address(CpuRegister(RSP), second.GetStackIndex());
6990 if (instruction->IsAnd()) {
6991 __ andl(first.AsRegister<CpuRegister>(), address);
6992 } else if (instruction->IsOr()) {
6993 __ orl(first.AsRegister<CpuRegister>(), address);
6994 } else {
6995 DCHECK(instruction->IsXor());
6996 __ xorl(first.AsRegister<CpuRegister>(), address);
6997 }
6998 }
6999 } else {
7000 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7001 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7002 bool second_is_constant = false;
7003 int64_t value = 0;
7004 if (second.IsConstant()) {
7005 second_is_constant = true;
7006 value = second.GetConstant()->AsLongConstant()->GetValue();
7007 }
7008 bool is_int32_value = IsInt<32>(value);
7009
7010 if (instruction->IsAnd()) {
7011 if (second_is_constant) {
7012 if (is_int32_value) {
7013 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7014 } else {
7015 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7016 }
7017 } else if (second.IsDoubleStackSlot()) {
7018 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7019 } else {
7020 __ andq(first_reg, second.AsRegister<CpuRegister>());
7021 }
7022 } else if (instruction->IsOr()) {
7023 if (second_is_constant) {
7024 if (is_int32_value) {
7025 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7026 } else {
7027 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7028 }
7029 } else if (second.IsDoubleStackSlot()) {
7030 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7031 } else {
7032 __ orq(first_reg, second.AsRegister<CpuRegister>());
7033 }
7034 } else {
7035 DCHECK(instruction->IsXor());
7036 if (second_is_constant) {
7037 if (is_int32_value) {
7038 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7039 } else {
7040 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7041 }
7042 } else if (second.IsDoubleStackSlot()) {
7043 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7044 } else {
7045 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7046 }
7047 }
7048 }
7049 }
7050
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7051 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7052 HInstruction* instruction,
7053 Location out,
7054 uint32_t offset,
7055 Location maybe_temp,
7056 ReadBarrierOption read_barrier_option) {
7057 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7058 if (read_barrier_option == kWithReadBarrier) {
7059 CHECK(kEmitCompilerReadBarrier);
7060 if (kUseBakerReadBarrier) {
7061 // Load with fast path based Baker's read barrier.
7062 // /* HeapReference<Object> */ out = *(out + offset)
7063 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7064 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7065 } else {
7066 // Load with slow path based read barrier.
7067 // Save the value of `out` into `maybe_temp` before overwriting it
7068 // in the following move operation, as we will need it for the
7069 // read barrier below.
7070 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7071 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7072 // /* HeapReference<Object> */ out = *(out + offset)
7073 __ movl(out_reg, Address(out_reg, offset));
7074 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7075 }
7076 } else {
7077 // Plain load with no read barrier.
7078 // /* HeapReference<Object> */ out = *(out + offset)
7079 __ movl(out_reg, Address(out_reg, offset));
7080 __ MaybeUnpoisonHeapReference(out_reg);
7081 }
7082 }
7083
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7084 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7085 HInstruction* instruction,
7086 Location out,
7087 Location obj,
7088 uint32_t offset,
7089 ReadBarrierOption read_barrier_option) {
7090 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7091 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7092 if (read_barrier_option == kWithReadBarrier) {
7093 CHECK(kEmitCompilerReadBarrier);
7094 if (kUseBakerReadBarrier) {
7095 // Load with fast path based Baker's read barrier.
7096 // /* HeapReference<Object> */ out = *(obj + offset)
7097 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7098 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7099 } else {
7100 // Load with slow path based read barrier.
7101 // /* HeapReference<Object> */ out = *(obj + offset)
7102 __ movl(out_reg, Address(obj_reg, offset));
7103 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7104 }
7105 } else {
7106 // Plain load with no read barrier.
7107 // /* HeapReference<Object> */ out = *(obj + offset)
7108 __ movl(out_reg, Address(obj_reg, offset));
7109 __ MaybeUnpoisonHeapReference(out_reg);
7110 }
7111 }
7112
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7113 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7114 HInstruction* instruction,
7115 Location root,
7116 const Address& address,
7117 Label* fixup_label,
7118 ReadBarrierOption read_barrier_option) {
7119 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7120 if (read_barrier_option == kWithReadBarrier) {
7121 DCHECK(kEmitCompilerReadBarrier);
7122 if (kUseBakerReadBarrier) {
7123 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7124 // Baker's read barrier are used:
7125 //
7126 // root = obj.field;
7127 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7128 // if (temp != null) {
7129 // root = temp(root)
7130 // }
7131
7132 // /* GcRoot<mirror::Object> */ root = *address
7133 __ movl(root_reg, address);
7134 if (fixup_label != nullptr) {
7135 __ Bind(fixup_label);
7136 }
7137 static_assert(
7138 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7139 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7140 "have different sizes.");
7141 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7142 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7143 "have different sizes.");
7144
7145 // Slow path marking the GC root `root`.
7146 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7147 instruction, root, /* unpoison_ref_before_marking= */ false);
7148 codegen_->AddSlowPath(slow_path);
7149
7150 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7151 const int32_t entry_point_offset =
7152 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7153 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7154 // The entrypoint is null when the GC is not marking.
7155 __ j(kNotEqual, slow_path->GetEntryLabel());
7156 __ Bind(slow_path->GetExitLabel());
7157 } else {
7158 // GC root loaded through a slow path for read barriers other
7159 // than Baker's.
7160 // /* GcRoot<mirror::Object>* */ root = address
7161 __ leaq(root_reg, address);
7162 if (fixup_label != nullptr) {
7163 __ Bind(fixup_label);
7164 }
7165 // /* mirror::Object* */ root = root->Read()
7166 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7167 }
7168 } else {
7169 // Plain GC root load with no read barrier.
7170 // /* GcRoot<mirror::Object> */ root = *address
7171 __ movl(root_reg, address);
7172 if (fixup_label != nullptr) {
7173 __ Bind(fixup_label);
7174 }
7175 // Note that GC roots are not affected by heap poisoning, thus we
7176 // do not have to unpoison `root_reg` here.
7177 }
7178 }
7179
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7180 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7181 Location ref,
7182 CpuRegister obj,
7183 uint32_t offset,
7184 bool needs_null_check) {
7185 DCHECK(kEmitCompilerReadBarrier);
7186 DCHECK(kUseBakerReadBarrier);
7187
7188 // /* HeapReference<Object> */ ref = *(obj + offset)
7189 Address src(obj, offset);
7190 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7191 }
7192
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7193 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7194 Location ref,
7195 CpuRegister obj,
7196 uint32_t data_offset,
7197 Location index,
7198 bool needs_null_check) {
7199 DCHECK(kEmitCompilerReadBarrier);
7200 DCHECK(kUseBakerReadBarrier);
7201
7202 static_assert(
7203 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7204 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7205 // /* HeapReference<Object> */ ref =
7206 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7207 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7208 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7209 }
7210
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7211 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7212 Location ref,
7213 CpuRegister obj,
7214 const Address& src,
7215 bool needs_null_check,
7216 bool always_update_field,
7217 CpuRegister* temp1,
7218 CpuRegister* temp2) {
7219 DCHECK(kEmitCompilerReadBarrier);
7220 DCHECK(kUseBakerReadBarrier);
7221
7222 // In slow path based read barriers, the read barrier call is
7223 // inserted after the original load. However, in fast path based
7224 // Baker's read barriers, we need to perform the load of
7225 // mirror::Object::monitor_ *before* the original reference load.
7226 // This load-load ordering is required by the read barrier.
7227 // The fast path/slow path (for Baker's algorithm) should look like:
7228 //
7229 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7230 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7231 // HeapReference<Object> ref = *src; // Original reference load.
7232 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7233 // if (is_gray) {
7234 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7235 // }
7236 //
7237 // Note: the original implementation in ReadBarrier::Barrier is
7238 // slightly more complex as:
7239 // - it implements the load-load fence using a data dependency on
7240 // the high-bits of rb_state, which are expected to be all zeroes
7241 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7242 // here, which is a no-op thanks to the x86-64 memory model);
7243 // - it performs additional checks that we do not do here for
7244 // performance reasons.
7245
7246 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7247 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7248
7249 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7250 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7251 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7252 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7253 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7254 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7255
7256 // if (rb_state == ReadBarrier::GrayState())
7257 // ref = ReadBarrier::Mark(ref);
7258 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7259 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7260 if (needs_null_check) {
7261 MaybeRecordImplicitNullCheck(instruction);
7262 }
7263
7264 // Load fence to prevent load-load reordering.
7265 // Note that this is a no-op, thanks to the x86-64 memory model.
7266 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7267
7268 // The actual reference load.
7269 // /* HeapReference<Object> */ ref = *src
7270 __ movl(ref_reg, src); // Flags are unaffected.
7271
7272 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7273 // Slow path marking the object `ref` when it is gray.
7274 SlowPathCode* slow_path;
7275 if (always_update_field) {
7276 DCHECK(temp1 != nullptr);
7277 DCHECK(temp2 != nullptr);
7278 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7279 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7280 } else {
7281 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7282 instruction, ref, /* unpoison_ref_before_marking= */ true);
7283 }
7284 AddSlowPath(slow_path);
7285
7286 // We have done the "if" of the gray bit check above, now branch based on the flags.
7287 __ j(kNotZero, slow_path->GetEntryLabel());
7288
7289 // Object* ref = ref_addr->AsMirrorPtr()
7290 __ MaybeUnpoisonHeapReference(ref_reg);
7291
7292 __ Bind(slow_path->GetExitLabel());
7293 }
7294
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7295 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7296 Location out,
7297 Location ref,
7298 Location obj,
7299 uint32_t offset,
7300 Location index) {
7301 DCHECK(kEmitCompilerReadBarrier);
7302
7303 // Insert a slow path based read barrier *after* the reference load.
7304 //
7305 // If heap poisoning is enabled, the unpoisoning of the loaded
7306 // reference will be carried out by the runtime within the slow
7307 // path.
7308 //
7309 // Note that `ref` currently does not get unpoisoned (when heap
7310 // poisoning is enabled), which is alright as the `ref` argument is
7311 // not used by the artReadBarrierSlow entry point.
7312 //
7313 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7314 SlowPathCode* slow_path = new (GetScopedAllocator())
7315 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7316 AddSlowPath(slow_path);
7317
7318 __ jmp(slow_path->GetEntryLabel());
7319 __ Bind(slow_path->GetExitLabel());
7320 }
7321
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7322 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7323 Location out,
7324 Location ref,
7325 Location obj,
7326 uint32_t offset,
7327 Location index) {
7328 if (kEmitCompilerReadBarrier) {
7329 // Baker's read barriers shall be handled by the fast path
7330 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7331 DCHECK(!kUseBakerReadBarrier);
7332 // If heap poisoning is enabled, unpoisoning will be taken care of
7333 // by the runtime within the slow path.
7334 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7335 } else if (kPoisonHeapReferences) {
7336 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7337 }
7338 }
7339
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7340 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7341 Location out,
7342 Location root) {
7343 DCHECK(kEmitCompilerReadBarrier);
7344
7345 // Insert a slow path based read barrier *after* the GC root load.
7346 //
7347 // Note that GC roots are not affected by heap poisoning, so we do
7348 // not need to do anything special for this here.
7349 SlowPathCode* slow_path =
7350 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7351 AddSlowPath(slow_path);
7352
7353 __ jmp(slow_path->GetEntryLabel());
7354 __ Bind(slow_path->GetExitLabel());
7355 }
7356
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7357 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7358 // Nothing to do, this should be removed during prepare for register allocator.
7359 LOG(FATAL) << "Unreachable";
7360 }
7361
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7362 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7363 // Nothing to do, this should be removed during prepare for register allocator.
7364 LOG(FATAL) << "Unreachable";
7365 }
7366
7367 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7368 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7369 LocationSummary* locations =
7370 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7371 locations->SetInAt(0, Location::RequiresRegister());
7372 locations->AddTemp(Location::RequiresRegister());
7373 locations->AddTemp(Location::RequiresRegister());
7374 }
7375
VisitPackedSwitch(HPackedSwitch * switch_instr)7376 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7377 int32_t lower_bound = switch_instr->GetStartValue();
7378 uint32_t num_entries = switch_instr->GetNumEntries();
7379 LocationSummary* locations = switch_instr->GetLocations();
7380 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7381 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7382 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7383 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7384
7385 // Should we generate smaller inline compare/jumps?
7386 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7387 // Figure out the correct compare values and jump conditions.
7388 // Handle the first compare/branch as a special case because it might
7389 // jump to the default case.
7390 DCHECK_GT(num_entries, 2u);
7391 Condition first_condition;
7392 uint32_t index;
7393 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7394 if (lower_bound != 0) {
7395 first_condition = kLess;
7396 __ cmpl(value_reg_in, Immediate(lower_bound));
7397 __ j(first_condition, codegen_->GetLabelOf(default_block));
7398 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7399
7400 index = 1;
7401 } else {
7402 // Handle all the compare/jumps below.
7403 first_condition = kBelow;
7404 index = 0;
7405 }
7406
7407 // Handle the rest of the compare/jumps.
7408 for (; index + 1 < num_entries; index += 2) {
7409 int32_t compare_to_value = lower_bound + index + 1;
7410 __ cmpl(value_reg_in, Immediate(compare_to_value));
7411 // Jump to successors[index] if value < case_value[index].
7412 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7413 // Jump to successors[index + 1] if value == case_value[index + 1].
7414 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7415 }
7416
7417 if (index != num_entries) {
7418 // There are an odd number of entries. Handle the last one.
7419 DCHECK_EQ(index + 1, num_entries);
7420 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7421 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7422 }
7423
7424 // And the default for any other value.
7425 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7426 __ jmp(codegen_->GetLabelOf(default_block));
7427 }
7428 return;
7429 }
7430
7431 // Remove the bias, if needed.
7432 Register value_reg_out = value_reg_in.AsRegister();
7433 if (lower_bound != 0) {
7434 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7435 value_reg_out = temp_reg.AsRegister();
7436 }
7437 CpuRegister value_reg(value_reg_out);
7438
7439 // Is the value in range?
7440 __ cmpl(value_reg, Immediate(num_entries - 1));
7441 __ j(kAbove, codegen_->GetLabelOf(default_block));
7442
7443 // We are in the range of the table.
7444 // Load the address of the jump table in the constant area.
7445 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7446
7447 // Load the (signed) offset from the jump table.
7448 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7449
7450 // Add the offset to the address of the table base.
7451 __ addq(temp_reg, base_reg);
7452
7453 // And jump.
7454 __ jmp(temp_reg);
7455 }
7456
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7457 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7458 ATTRIBUTE_UNUSED) {
7459 LOG(FATAL) << "Unreachable";
7460 }
7461
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7462 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7463 ATTRIBUTE_UNUSED) {
7464 LOG(FATAL) << "Unreachable";
7465 }
7466
Load32BitValue(CpuRegister dest,int32_t value)7467 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7468 if (value == 0) {
7469 __ xorl(dest, dest);
7470 } else {
7471 __ movl(dest, Immediate(value));
7472 }
7473 }
7474
Load64BitValue(CpuRegister dest,int64_t value)7475 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7476 if (value == 0) {
7477 // Clears upper bits too.
7478 __ xorl(dest, dest);
7479 } else if (IsUint<32>(value)) {
7480 // We can use a 32 bit move, as it will zero-extend and is shorter.
7481 __ movl(dest, Immediate(static_cast<int32_t>(value)));
7482 } else {
7483 __ movq(dest, Immediate(value));
7484 }
7485 }
7486
Load32BitValue(XmmRegister dest,int32_t value)7487 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7488 if (value == 0) {
7489 __ xorps(dest, dest);
7490 } else {
7491 __ movss(dest, LiteralInt32Address(value));
7492 }
7493 }
7494
Load64BitValue(XmmRegister dest,int64_t value)7495 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7496 if (value == 0) {
7497 __ xorpd(dest, dest);
7498 } else {
7499 __ movsd(dest, LiteralInt64Address(value));
7500 }
7501 }
7502
Load32BitValue(XmmRegister dest,float value)7503 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7504 Load32BitValue(dest, bit_cast<int32_t, float>(value));
7505 }
7506
Load64BitValue(XmmRegister dest,double value)7507 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7508 Load64BitValue(dest, bit_cast<int64_t, double>(value));
7509 }
7510
Compare32BitValue(CpuRegister dest,int32_t value)7511 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7512 if (value == 0) {
7513 __ testl(dest, dest);
7514 } else {
7515 __ cmpl(dest, Immediate(value));
7516 }
7517 }
7518
Compare64BitValue(CpuRegister dest,int64_t value)7519 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7520 if (IsInt<32>(value)) {
7521 if (value == 0) {
7522 __ testq(dest, dest);
7523 } else {
7524 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7525 }
7526 } else {
7527 // Value won't fit in an int.
7528 __ cmpq(dest, LiteralInt64Address(value));
7529 }
7530 }
7531
GenerateIntCompare(Location lhs,Location rhs)7532 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7533 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7534 GenerateIntCompare(lhs_reg, rhs);
7535 }
7536
GenerateIntCompare(CpuRegister lhs,Location rhs)7537 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7538 if (rhs.IsConstant()) {
7539 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7540 Compare32BitValue(lhs, value);
7541 } else if (rhs.IsStackSlot()) {
7542 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7543 } else {
7544 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7545 }
7546 }
7547
GenerateLongCompare(Location lhs,Location rhs)7548 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7549 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7550 if (rhs.IsConstant()) {
7551 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7552 Compare64BitValue(lhs_reg, value);
7553 } else if (rhs.IsDoubleStackSlot()) {
7554 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7555 } else {
7556 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7557 }
7558 }
7559
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7560 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7561 Location index,
7562 ScaleFactor scale,
7563 uint32_t data_offset) {
7564 return index.IsConstant() ?
7565 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7566 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7567 }
7568
Store64BitValueToStack(Location dest,int64_t value)7569 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7570 DCHECK(dest.IsDoubleStackSlot());
7571 if (IsInt<32>(value)) {
7572 // Can move directly as an int32 constant.
7573 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7574 Immediate(static_cast<int32_t>(value)));
7575 } else {
7576 Load64BitValue(CpuRegister(TMP), value);
7577 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7578 }
7579 }
7580
7581 /**
7582 * Class to handle late fixup of offsets into constant area.
7583 */
7584 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7585 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7586 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7587 : codegen_(&codegen), offset_into_constant_area_(offset) {}
7588
7589 protected:
SetOffset(size_t offset)7590 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7591
7592 CodeGeneratorX86_64* codegen_;
7593
7594 private:
Process(const MemoryRegion & region,int pos)7595 void Process(const MemoryRegion& region, int pos) override {
7596 // Patch the correct offset for the instruction. We use the address of the
7597 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7598 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7599 int32_t relative_position = constant_offset - pos;
7600
7601 // Patch in the right value.
7602 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7603 }
7604
7605 // Location in constant area that the fixup refers to.
7606 size_t offset_into_constant_area_;
7607 };
7608
7609 /**
7610 t * Class to handle late fixup of offsets to a jump table that will be created in the
7611 * constant area.
7612 */
7613 class JumpTableRIPFixup : public RIPFixup {
7614 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7615 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7616 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7617
CreateJumpTable()7618 void CreateJumpTable() {
7619 X86_64Assembler* assembler = codegen_->GetAssembler();
7620
7621 // Ensure that the reference to the jump table has the correct offset.
7622 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7623 SetOffset(offset_in_constant_table);
7624
7625 // Compute the offset from the start of the function to this jump table.
7626 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7627
7628 // Populate the jump table with the correct values for the jump table.
7629 int32_t num_entries = switch_instr_->GetNumEntries();
7630 HBasicBlock* block = switch_instr_->GetBlock();
7631 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7632 // The value that we want is the target offset - the position of the table.
7633 for (int32_t i = 0; i < num_entries; i++) {
7634 HBasicBlock* b = successors[i];
7635 Label* l = codegen_->GetLabelOf(b);
7636 DCHECK(l->IsBound());
7637 int32_t offset_to_block = l->Position() - current_table_offset;
7638 assembler->AppendInt32(offset_to_block);
7639 }
7640 }
7641
7642 private:
7643 const HPackedSwitch* switch_instr_;
7644 };
7645
Finalize(CodeAllocator * allocator)7646 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7647 // Generate the constant area if needed.
7648 X86_64Assembler* assembler = GetAssembler();
7649 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7650 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7651 assembler->Align(4, 0);
7652 constant_area_start_ = assembler->CodeSize();
7653
7654 // Populate any jump tables.
7655 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7656 jump_table->CreateJumpTable();
7657 }
7658
7659 // And now add the constant area to the generated code.
7660 assembler->AddConstantArea();
7661 }
7662
7663 // And finish up.
7664 CodeGenerator::Finalize(allocator);
7665 }
7666
LiteralDoubleAddress(double v)7667 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7668 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7669 return Address::RIP(fixup);
7670 }
7671
LiteralFloatAddress(float v)7672 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7673 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7674 return Address::RIP(fixup);
7675 }
7676
LiteralInt32Address(int32_t v)7677 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7678 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7679 return Address::RIP(fixup);
7680 }
7681
LiteralInt64Address(int64_t v)7682 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7683 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7684 return Address::RIP(fixup);
7685 }
7686
7687 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7688 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7689 if (!trg.IsValid()) {
7690 DCHECK_EQ(type, DataType::Type::kVoid);
7691 return;
7692 }
7693
7694 DCHECK_NE(type, DataType::Type::kVoid);
7695
7696 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7697 if (trg.Equals(return_loc)) {
7698 return;
7699 }
7700
7701 // Let the parallel move resolver take care of all of this.
7702 HParallelMove parallel_move(GetGraph()->GetAllocator());
7703 parallel_move.AddMove(return_loc, trg, type, nullptr);
7704 GetMoveResolver()->EmitNativeCode(¶llel_move);
7705 }
7706
LiteralCaseTable(HPackedSwitch * switch_instr)7707 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7708 // Create a fixup to be used to create and address the jump table.
7709 JumpTableRIPFixup* table_fixup =
7710 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7711
7712 // We have to populate the jump tables.
7713 fixups_to_jump_tables_.push_back(table_fixup);
7714 return Address::RIP(table_fixup);
7715 }
7716
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7717 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7718 const Address& addr_high,
7719 int64_t v,
7720 HInstruction* instruction) {
7721 if (IsInt<32>(v)) {
7722 int32_t v_32 = v;
7723 __ movq(addr_low, Immediate(v_32));
7724 MaybeRecordImplicitNullCheck(instruction);
7725 } else {
7726 // Didn't fit in a register. Do it in pieces.
7727 int32_t low_v = Low32Bits(v);
7728 int32_t high_v = High32Bits(v);
7729 __ movl(addr_low, Immediate(low_v));
7730 MaybeRecordImplicitNullCheck(instruction);
7731 __ movl(addr_high, Immediate(high_v));
7732 }
7733 }
7734
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7735 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7736 const uint8_t* roots_data,
7737 const PatchInfo<Label>& info,
7738 uint64_t index_in_table) const {
7739 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7740 uintptr_t address =
7741 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7742 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
7743 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7744 dchecked_integral_cast<uint32_t>(address);
7745 }
7746
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7747 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7748 for (const PatchInfo<Label>& info : jit_string_patches_) {
7749 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7750 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7751 PatchJitRootUse(code, roots_data, info, index_in_table);
7752 }
7753
7754 for (const PatchInfo<Label>& info : jit_class_patches_) {
7755 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7756 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7757 PatchJitRootUse(code, roots_data, info, index_in_table);
7758 }
7759 }
7760
CpuHasAvxFeatureFlag()7761 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
7762 return codegen_->GetInstructionSetFeatures().HasAVX();
7763 }
7764
CpuHasAvx2FeatureFlag()7765 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
7766 return codegen_->GetInstructionSetFeatures().HasAVX2();
7767 }
7768
CpuHasAvxFeatureFlag()7769 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
7770 return codegen_->GetInstructionSetFeatures().HasAVX();
7771 }
7772
CpuHasAvx2FeatureFlag()7773 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
7774 return codegen_->GetInstructionSetFeatures().HasAVX2();
7775 }
7776
7777 #undef __
7778
7779 } // namespace x86_64
7780 } // namespace art
7781