1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_x86_64.h"
31 #include "jit/profiling_info.h"
32 #include "linker/linker_patch.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/class-inl.h"
36 #include "mirror/object_reference.h"
37 #include "scoped_thread_state_change-inl.h"
38 #include "thread.h"
39 #include "utils/assembler.h"
40 #include "utils/stack_checks.h"
41 #include "utils/x86_64/assembler_x86_64.h"
42 #include "utils/x86_64/constants_x86_64.h"
43 #include "utils/x86_64/managed_register_x86_64.h"
44
45 namespace art {
46
47 template<class MirrorType>
48 class GcRoot;
49
50 namespace x86_64 {
51
52 static constexpr int kCurrentMethodStackOffset = 0;
53 static constexpr Register kMethodRegisterArgument = RDI;
54 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
55 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
56 // generates less code/data with a small num_entries.
57 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
58
59 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
60 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
61
62 static constexpr int kC2ConditionMask = 0x400;
63
OneRegInReferenceOutSaveEverythingCallerSaves()64 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
65 // Custom calling convention: RAX serves as both input and output.
66 RegisterSet caller_saves = RegisterSet::Empty();
67 caller_saves.Add(Location::RegisterLocation(RAX));
68 return caller_saves;
69 }
70
71 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
72 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
73 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
74
75 class NullCheckSlowPathX86_64 : public SlowPathCode {
76 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)77 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
78
EmitNativeCode(CodeGenerator * codegen)79 void EmitNativeCode(CodeGenerator* codegen) override {
80 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
81 __ Bind(GetEntryLabel());
82 if (instruction_->CanThrowIntoCatchBlock()) {
83 // Live registers will be restored in the catch block if caught.
84 SaveLiveRegisters(codegen, instruction_->GetLocations());
85 }
86 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
87 instruction_,
88 instruction_->GetDexPc(),
89 this);
90 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
91 }
92
IsFatal() const93 bool IsFatal() const override { return true; }
94
GetDescription() const95 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
96
97 private:
98 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
99 };
100
101 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
102 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)103 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
104
EmitNativeCode(CodeGenerator * codegen)105 void EmitNativeCode(CodeGenerator* codegen) override {
106 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
107 __ Bind(GetEntryLabel());
108 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
109 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
110 }
111
IsFatal() const112 bool IsFatal() const override { return true; }
113
GetDescription() const114 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
115
116 private:
117 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
118 };
119
120 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
121 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)122 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
123 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
124
EmitNativeCode(CodeGenerator * codegen)125 void EmitNativeCode(CodeGenerator* codegen) override {
126 __ Bind(GetEntryLabel());
127 if (type_ == DataType::Type::kInt32) {
128 if (is_div_) {
129 __ negl(cpu_reg_);
130 } else {
131 __ xorl(cpu_reg_, cpu_reg_);
132 }
133
134 } else {
135 DCHECK_EQ(DataType::Type::kInt64, type_);
136 if (is_div_) {
137 __ negq(cpu_reg_);
138 } else {
139 __ xorl(cpu_reg_, cpu_reg_);
140 }
141 }
142 __ jmp(GetExitLabel());
143 }
144
GetDescription() const145 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
146
147 private:
148 const CpuRegister cpu_reg_;
149 const DataType::Type type_;
150 const bool is_div_;
151 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
152 };
153
154 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
155 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)156 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
157 : SlowPathCode(instruction), successor_(successor) {}
158
EmitNativeCode(CodeGenerator * codegen)159 void EmitNativeCode(CodeGenerator* codegen) override {
160 LocationSummary* locations = instruction_->GetLocations();
161 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
162 __ Bind(GetEntryLabel());
163 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
164 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
165 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
166 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
167 if (successor_ == nullptr) {
168 __ jmp(GetReturnLabel());
169 } else {
170 __ jmp(x86_64_codegen->GetLabelOf(successor_));
171 }
172 }
173
GetReturnLabel()174 Label* GetReturnLabel() {
175 DCHECK(successor_ == nullptr);
176 return &return_label_;
177 }
178
GetSuccessor() const179 HBasicBlock* GetSuccessor() const {
180 return successor_;
181 }
182
GetDescription() const183 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
184
185 private:
186 HBasicBlock* const successor_;
187 Label return_label_;
188
189 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
190 };
191
192 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
193 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)194 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
195 : SlowPathCode(instruction) {}
196
EmitNativeCode(CodeGenerator * codegen)197 void EmitNativeCode(CodeGenerator* codegen) override {
198 LocationSummary* locations = instruction_->GetLocations();
199 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
200 __ Bind(GetEntryLabel());
201 if (instruction_->CanThrowIntoCatchBlock()) {
202 // Live registers will be restored in the catch block if caught.
203 SaveLiveRegisters(codegen, instruction_->GetLocations());
204 }
205 // Are we using an array length from memory?
206 HInstruction* array_length = instruction_->InputAt(1);
207 Location length_loc = locations->InAt(1);
208 InvokeRuntimeCallingConvention calling_convention;
209 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
210 // Load the array length into our temporary.
211 HArrayLength* length = array_length->AsArrayLength();
212 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
213 Location array_loc = array_length->GetLocations()->InAt(0);
214 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
215 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
216 // Check for conflicts with index.
217 if (length_loc.Equals(locations->InAt(0))) {
218 // We know we aren't using parameter 2.
219 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
220 }
221 __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
222 if (mirror::kUseStringCompression && length->IsStringLength()) {
223 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
224 }
225 }
226
227 // We're moving two locations to locations that could overlap, so we need a parallel
228 // move resolver.
229 codegen->EmitParallelMoves(
230 locations->InAt(0),
231 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
232 DataType::Type::kInt32,
233 length_loc,
234 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
235 DataType::Type::kInt32);
236 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
237 ? kQuickThrowStringBounds
238 : kQuickThrowArrayBounds;
239 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
240 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
241 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
242 }
243
IsFatal() const244 bool IsFatal() const override { return true; }
245
GetDescription() const246 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
247
248 private:
249 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
250 };
251
252 class LoadClassSlowPathX86_64 : public SlowPathCode {
253 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)254 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
255 : SlowPathCode(at), cls_(cls) {
256 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
257 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
258 }
259
EmitNativeCode(CodeGenerator * codegen)260 void EmitNativeCode(CodeGenerator* codegen) override {
261 LocationSummary* locations = instruction_->GetLocations();
262 Location out = locations->Out();
263 const uint32_t dex_pc = instruction_->GetDexPc();
264 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
265 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
266
267 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
268 __ Bind(GetEntryLabel());
269 SaveLiveRegisters(codegen, locations);
270
271 // Custom calling convention: RAX serves as both input and output.
272 if (must_resolve_type) {
273 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
274 dex::TypeIndex type_index = cls_->GetTypeIndex();
275 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
276 if (cls_->NeedsAccessCheck()) {
277 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
278 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
279 } else {
280 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
281 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
282 }
283 // If we also must_do_clinit, the resolved type is now in the correct register.
284 } else {
285 DCHECK(must_do_clinit);
286 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
287 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
288 }
289 if (must_do_clinit) {
290 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
291 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
292 }
293
294 // Move the class to the desired location.
295 if (out.IsValid()) {
296 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
297 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
298 }
299
300 RestoreLiveRegisters(codegen, locations);
301 __ jmp(GetExitLabel());
302 }
303
GetDescription() const304 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
305
306 private:
307 // The class this slow path will load.
308 HLoadClass* const cls_;
309
310 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
311 };
312
313 class LoadStringSlowPathX86_64 : public SlowPathCode {
314 public:
LoadStringSlowPathX86_64(HLoadString * instruction)315 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
316
EmitNativeCode(CodeGenerator * codegen)317 void EmitNativeCode(CodeGenerator* codegen) override {
318 LocationSummary* locations = instruction_->GetLocations();
319 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
320
321 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
322 __ Bind(GetEntryLabel());
323 SaveLiveRegisters(codegen, locations);
324
325 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
326 // Custom calling convention: RAX serves as both input and output.
327 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
328 x86_64_codegen->InvokeRuntime(kQuickResolveString,
329 instruction_,
330 instruction_->GetDexPc(),
331 this);
332 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
333 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
334 RestoreLiveRegisters(codegen, locations);
335
336 __ jmp(GetExitLabel());
337 }
338
GetDescription() const339 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
340
341 private:
342 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
343 };
344
345 class TypeCheckSlowPathX86_64 : public SlowPathCode {
346 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)347 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
348 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
349
EmitNativeCode(CodeGenerator * codegen)350 void EmitNativeCode(CodeGenerator* codegen) override {
351 LocationSummary* locations = instruction_->GetLocations();
352 uint32_t dex_pc = instruction_->GetDexPc();
353 DCHECK(instruction_->IsCheckCast()
354 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
355
356 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
357 __ Bind(GetEntryLabel());
358
359 if (kPoisonHeapReferences &&
360 instruction_->IsCheckCast() &&
361 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
362 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
363 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
364 }
365
366 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
367 SaveLiveRegisters(codegen, locations);
368 }
369
370 // We're moving two locations to locations that could overlap, so we need a parallel
371 // move resolver.
372 InvokeRuntimeCallingConvention calling_convention;
373 codegen->EmitParallelMoves(locations->InAt(0),
374 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
375 DataType::Type::kReference,
376 locations->InAt(1),
377 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
378 DataType::Type::kReference);
379 if (instruction_->IsInstanceOf()) {
380 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
381 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
382 } else {
383 DCHECK(instruction_->IsCheckCast());
384 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
385 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
386 }
387
388 if (!is_fatal_) {
389 if (instruction_->IsInstanceOf()) {
390 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
391 }
392
393 RestoreLiveRegisters(codegen, locations);
394 __ jmp(GetExitLabel());
395 }
396 }
397
GetDescription() const398 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
399
IsFatal() const400 bool IsFatal() const override { return is_fatal_; }
401
402 private:
403 const bool is_fatal_;
404
405 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
406 };
407
408 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
409 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)410 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
411 : SlowPathCode(instruction) {}
412
EmitNativeCode(CodeGenerator * codegen)413 void EmitNativeCode(CodeGenerator* codegen) override {
414 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
415 __ Bind(GetEntryLabel());
416 LocationSummary* locations = instruction_->GetLocations();
417 SaveLiveRegisters(codegen, locations);
418 InvokeRuntimeCallingConvention calling_convention;
419 x86_64_codegen->Load32BitValue(
420 CpuRegister(calling_convention.GetRegisterAt(0)),
421 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
422 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
423 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
424 }
425
GetDescription() const426 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
427
428 private:
429 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
430 };
431
432 class ArraySetSlowPathX86_64 : public SlowPathCode {
433 public:
ArraySetSlowPathX86_64(HInstruction * instruction)434 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
435
EmitNativeCode(CodeGenerator * codegen)436 void EmitNativeCode(CodeGenerator* codegen) override {
437 LocationSummary* locations = instruction_->GetLocations();
438 __ Bind(GetEntryLabel());
439 SaveLiveRegisters(codegen, locations);
440
441 InvokeRuntimeCallingConvention calling_convention;
442 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
443 parallel_move.AddMove(
444 locations->InAt(0),
445 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
446 DataType::Type::kReference,
447 nullptr);
448 parallel_move.AddMove(
449 locations->InAt(1),
450 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
451 DataType::Type::kInt32,
452 nullptr);
453 parallel_move.AddMove(
454 locations->InAt(2),
455 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
456 DataType::Type::kReference,
457 nullptr);
458 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
459
460 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
461 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
462 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
463 RestoreLiveRegisters(codegen, locations);
464 __ jmp(GetExitLabel());
465 }
466
GetDescription() const467 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
468
469 private:
470 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
471 };
472
473 // Slow path marking an object reference `ref` during a read
474 // barrier. The field `obj.field` in the object `obj` holding this
475 // reference does not get updated by this slow path after marking (see
476 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
477 //
478 // This means that after the execution of this slow path, `ref` will
479 // always be up-to-date, but `obj.field` may not; i.e., after the
480 // flip, `ref` will be a to-space reference, but `obj.field` will
481 // probably still be a from-space reference (unless it gets updated by
482 // another thread, or if another thread installed another object
483 // reference (different from `ref`) in `obj.field`).
484 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
485 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)486 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
487 Location ref,
488 bool unpoison_ref_before_marking)
489 : SlowPathCode(instruction),
490 ref_(ref),
491 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
492 DCHECK(kEmitCompilerReadBarrier);
493 }
494
GetDescription() const495 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
496
EmitNativeCode(CodeGenerator * codegen)497 void EmitNativeCode(CodeGenerator* codegen) override {
498 LocationSummary* locations = instruction_->GetLocations();
499 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
500 Register ref_reg = ref_cpu_reg.AsRegister();
501 DCHECK(locations->CanCall());
502 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
503 DCHECK(instruction_->IsInstanceFieldGet() ||
504 instruction_->IsPredicatedInstanceFieldGet() ||
505 instruction_->IsStaticFieldGet() ||
506 instruction_->IsArrayGet() ||
507 instruction_->IsArraySet() ||
508 instruction_->IsLoadClass() ||
509 instruction_->IsLoadString() ||
510 instruction_->IsInstanceOf() ||
511 instruction_->IsCheckCast() ||
512 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
513 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
514 << "Unexpected instruction in read barrier marking slow path: "
515 << instruction_->DebugName();
516
517 __ Bind(GetEntryLabel());
518 if (unpoison_ref_before_marking_) {
519 // Object* ref = ref_addr->AsMirrorPtr()
520 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
521 }
522 // No need to save live registers; it's taken care of by the
523 // entrypoint. Also, there is no need to update the stack mask,
524 // as this runtime call will not trigger a garbage collection.
525 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526 DCHECK_NE(ref_reg, RSP);
527 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
528 // "Compact" slow path, saving two moves.
529 //
530 // Instead of using the standard runtime calling convention (input
531 // and output in R0):
532 //
533 // RDI <- ref
534 // RAX <- ReadBarrierMark(RDI)
535 // ref <- RAX
536 //
537 // we just use rX (the register containing `ref`) as input and output
538 // of a dedicated entrypoint:
539 //
540 // rX <- ReadBarrierMarkRegX(rX)
541 //
542 int32_t entry_point_offset =
543 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
544 // This runtime call does not require a stack map.
545 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
546 __ jmp(GetExitLabel());
547 }
548
549 private:
550 // The location (register) of the marked object reference.
551 const Location ref_;
552 // Should the reference in `ref_` be unpoisoned prior to marking it?
553 const bool unpoison_ref_before_marking_;
554
555 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
556 };
557
558 // Slow path marking an object reference `ref` during a read barrier,
559 // and if needed, atomically updating the field `obj.field` in the
560 // object `obj` holding this reference after marking (contrary to
561 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
562 // `obj.field`).
563 //
564 // This means that after the execution of this slow path, both `ref`
565 // and `obj.field` will be up-to-date; i.e., after the flip, both will
566 // hold the same to-space reference (unless another thread installed
567 // another object reference (different from `ref`) in `obj.field`).
568 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
569 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)570 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
571 Location ref,
572 CpuRegister obj,
573 const Address& field_addr,
574 bool unpoison_ref_before_marking,
575 CpuRegister temp1,
576 CpuRegister temp2)
577 : SlowPathCode(instruction),
578 ref_(ref),
579 obj_(obj),
580 field_addr_(field_addr),
581 unpoison_ref_before_marking_(unpoison_ref_before_marking),
582 temp1_(temp1),
583 temp2_(temp2) {
584 DCHECK(kEmitCompilerReadBarrier);
585 }
586
GetDescription() const587 const char* GetDescription() const override {
588 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
589 }
590
EmitNativeCode(CodeGenerator * codegen)591 void EmitNativeCode(CodeGenerator* codegen) override {
592 LocationSummary* locations = instruction_->GetLocations();
593 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
594 Register ref_reg = ref_cpu_reg.AsRegister();
595 DCHECK(locations->CanCall());
596 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
597 // This slow path is only used by the UnsafeCASObject intrinsic.
598 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
599 << "Unexpected instruction in read barrier marking and field updating slow path: "
600 << instruction_->DebugName();
601 DCHECK(instruction_->GetLocations()->Intrinsified());
602 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
603
604 __ Bind(GetEntryLabel());
605 if (unpoison_ref_before_marking_) {
606 // Object* ref = ref_addr->AsMirrorPtr()
607 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
608 }
609
610 // Save the old (unpoisoned) reference.
611 __ movl(temp1_, ref_cpu_reg);
612
613 // No need to save live registers; it's taken care of by the
614 // entrypoint. Also, there is no need to update the stack mask,
615 // as this runtime call will not trigger a garbage collection.
616 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
617 DCHECK_NE(ref_reg, RSP);
618 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
619 // "Compact" slow path, saving two moves.
620 //
621 // Instead of using the standard runtime calling convention (input
622 // and output in R0):
623 //
624 // RDI <- ref
625 // RAX <- ReadBarrierMark(RDI)
626 // ref <- RAX
627 //
628 // we just use rX (the register containing `ref`) as input and output
629 // of a dedicated entrypoint:
630 //
631 // rX <- ReadBarrierMarkRegX(rX)
632 //
633 int32_t entry_point_offset =
634 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
635 // This runtime call does not require a stack map.
636 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
637
638 // If the new reference is different from the old reference,
639 // update the field in the holder (`*field_addr`).
640 //
641 // Note that this field could also hold a different object, if
642 // another thread had concurrently changed it. In that case, the
643 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
644 // operation below would abort the CAS, leaving the field as-is.
645 NearLabel done;
646 __ cmpl(temp1_, ref_cpu_reg);
647 __ j(kEqual, &done);
648
649 // Update the the holder's field atomically. This may fail if
650 // mutator updates before us, but it's OK. This is achived
651 // using a strong compare-and-set (CAS) operation with relaxed
652 // memory synchronization ordering, where the expected value is
653 // the old reference and the desired value is the new reference.
654 // This operation is implemented with a 32-bit LOCK CMPXLCHG
655 // instruction, which requires the expected value (the old
656 // reference) to be in EAX. Save RAX beforehand, and move the
657 // expected value (stored in `temp1_`) into EAX.
658 __ movq(temp2_, CpuRegister(RAX));
659 __ movl(CpuRegister(RAX), temp1_);
660
661 // Convenience aliases.
662 CpuRegister base = obj_;
663 CpuRegister expected = CpuRegister(RAX);
664 CpuRegister value = ref_cpu_reg;
665
666 bool base_equals_value = (base.AsRegister() == value.AsRegister());
667 Register value_reg = ref_reg;
668 if (kPoisonHeapReferences) {
669 if (base_equals_value) {
670 // If `base` and `value` are the same register location, move
671 // `value_reg` to a temporary register. This way, poisoning
672 // `value_reg` won't invalidate `base`.
673 value_reg = temp1_.AsRegister();
674 __ movl(CpuRegister(value_reg), base);
675 }
676
677 // Check that the register allocator did not assign the location
678 // of `expected` (RAX) to `value` nor to `base`, so that heap
679 // poisoning (when enabled) works as intended below.
680 // - If `value` were equal to `expected`, both references would
681 // be poisoned twice, meaning they would not be poisoned at
682 // all, as heap poisoning uses address negation.
683 // - If `base` were equal to `expected`, poisoning `expected`
684 // would invalidate `base`.
685 DCHECK_NE(value_reg, expected.AsRegister());
686 DCHECK_NE(base.AsRegister(), expected.AsRegister());
687
688 __ PoisonHeapReference(expected);
689 __ PoisonHeapReference(CpuRegister(value_reg));
690 }
691
692 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
693
694 // If heap poisoning is enabled, we need to unpoison the values
695 // that were poisoned earlier.
696 if (kPoisonHeapReferences) {
697 if (base_equals_value) {
698 // `value_reg` has been moved to a temporary register, no need
699 // to unpoison it.
700 } else {
701 __ UnpoisonHeapReference(CpuRegister(value_reg));
702 }
703 // No need to unpoison `expected` (RAX), as it is be overwritten below.
704 }
705
706 // Restore RAX.
707 __ movq(CpuRegister(RAX), temp2_);
708
709 __ Bind(&done);
710 __ jmp(GetExitLabel());
711 }
712
713 private:
714 // The location (register) of the marked object reference.
715 const Location ref_;
716 // The register containing the object holding the marked object reference field.
717 const CpuRegister obj_;
718 // The address of the marked reference field. The base of this address must be `obj_`.
719 const Address field_addr_;
720
721 // Should the reference in `ref_` be unpoisoned prior to marking it?
722 const bool unpoison_ref_before_marking_;
723
724 const CpuRegister temp1_;
725 const CpuRegister temp2_;
726
727 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
728 };
729
730 // Slow path generating a read barrier for a heap reference.
731 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
732 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)733 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
734 Location out,
735 Location ref,
736 Location obj,
737 uint32_t offset,
738 Location index)
739 : SlowPathCode(instruction),
740 out_(out),
741 ref_(ref),
742 obj_(obj),
743 offset_(offset),
744 index_(index) {
745 DCHECK(kEmitCompilerReadBarrier);
746 // If `obj` is equal to `out` or `ref`, it means the initial
747 // object has been overwritten by (or after) the heap object
748 // reference load to be instrumented, e.g.:
749 //
750 // __ movl(out, Address(out, offset));
751 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
752 //
753 // In that case, we have lost the information about the original
754 // object, and the emitted read barrier cannot work properly.
755 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
756 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
757 }
758
EmitNativeCode(CodeGenerator * codegen)759 void EmitNativeCode(CodeGenerator* codegen) override {
760 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
761 LocationSummary* locations = instruction_->GetLocations();
762 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
763 DCHECK(locations->CanCall());
764 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
765 DCHECK(instruction_->IsInstanceFieldGet() ||
766 instruction_->IsPredicatedInstanceFieldGet() ||
767 instruction_->IsStaticFieldGet() ||
768 instruction_->IsArrayGet() ||
769 instruction_->IsInstanceOf() ||
770 instruction_->IsCheckCast() ||
771 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
772 << "Unexpected instruction in read barrier for heap reference slow path: "
773 << instruction_->DebugName();
774
775 __ Bind(GetEntryLabel());
776 SaveLiveRegisters(codegen, locations);
777
778 // We may have to change the index's value, but as `index_` is a
779 // constant member (like other "inputs" of this slow path),
780 // introduce a copy of it, `index`.
781 Location index = index_;
782 if (index_.IsValid()) {
783 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
784 if (instruction_->IsArrayGet()) {
785 // Compute real offset and store it in index_.
786 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
787 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
788 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
789 // We are about to change the value of `index_reg` (see the
790 // calls to art::x86_64::X86_64Assembler::shll and
791 // art::x86_64::X86_64Assembler::AddImmediate below), but it
792 // has not been saved by the previous call to
793 // art::SlowPathCode::SaveLiveRegisters, as it is a
794 // callee-save register --
795 // art::SlowPathCode::SaveLiveRegisters does not consider
796 // callee-save registers, as it has been designed with the
797 // assumption that callee-save registers are supposed to be
798 // handled by the called function. So, as a callee-save
799 // register, `index_reg` _would_ eventually be saved onto
800 // the stack, but it would be too late: we would have
801 // changed its value earlier. Therefore, we manually save
802 // it here into another freely available register,
803 // `free_reg`, chosen of course among the caller-save
804 // registers (as a callee-save `free_reg` register would
805 // exhibit the same problem).
806 //
807 // Note we could have requested a temporary register from
808 // the register allocator instead; but we prefer not to, as
809 // this is a slow path, and we know we can find a
810 // caller-save register that is available.
811 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
812 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
813 index_reg = free_reg;
814 index = Location::RegisterLocation(index_reg);
815 } else {
816 // The initial register stored in `index_` has already been
817 // saved in the call to art::SlowPathCode::SaveLiveRegisters
818 // (as it is not a callee-save register), so we can freely
819 // use it.
820 }
821 // Shifting the index value contained in `index_reg` by the
822 // scale factor (2) cannot overflow in practice, as the
823 // runtime is unable to allocate object arrays with a size
824 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
825 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
826 static_assert(
827 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
828 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
829 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
830 } else {
831 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
832 // intrinsics, `index_` is not shifted by a scale factor of 2
833 // (as in the case of ArrayGet), as it is actually an offset
834 // to an object field within an object.
835 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
836 DCHECK(instruction_->GetLocations()->Intrinsified());
837 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
838 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
839 << instruction_->AsInvoke()->GetIntrinsic();
840 DCHECK_EQ(offset_, 0U);
841 DCHECK(index_.IsRegister());
842 }
843 }
844
845 // We're moving two or three locations to locations that could
846 // overlap, so we need a parallel move resolver.
847 InvokeRuntimeCallingConvention calling_convention;
848 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
849 parallel_move.AddMove(ref_,
850 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
851 DataType::Type::kReference,
852 nullptr);
853 parallel_move.AddMove(obj_,
854 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
855 DataType::Type::kReference,
856 nullptr);
857 if (index.IsValid()) {
858 parallel_move.AddMove(index,
859 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
860 DataType::Type::kInt32,
861 nullptr);
862 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
863 } else {
864 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
865 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
866 }
867 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
868 instruction_,
869 instruction_->GetDexPc(),
870 this);
871 CheckEntrypointTypes<
872 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
873 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
874
875 RestoreLiveRegisters(codegen, locations);
876 __ jmp(GetExitLabel());
877 }
878
GetDescription() const879 const char* GetDescription() const override {
880 return "ReadBarrierForHeapReferenceSlowPathX86_64";
881 }
882
883 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)884 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
885 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
886 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
887 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
888 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
889 return static_cast<CpuRegister>(i);
890 }
891 }
892 // We shall never fail to find a free caller-save register, as
893 // there are more than two core caller-save registers on x86-64
894 // (meaning it is possible to find one which is different from
895 // `ref` and `obj`).
896 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
897 LOG(FATAL) << "Could not find a free caller-save register";
898 UNREACHABLE();
899 }
900
901 const Location out_;
902 const Location ref_;
903 const Location obj_;
904 const uint32_t offset_;
905 // An additional location containing an index to an array.
906 // Only used for HArrayGet and the UnsafeGetObject &
907 // UnsafeGetObjectVolatile intrinsics.
908 const Location index_;
909
910 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
911 };
912
913 // Slow path generating a read barrier for a GC root.
914 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
915 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)916 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
917 : SlowPathCode(instruction), out_(out), root_(root) {
918 DCHECK(kEmitCompilerReadBarrier);
919 }
920
EmitNativeCode(CodeGenerator * codegen)921 void EmitNativeCode(CodeGenerator* codegen) override {
922 LocationSummary* locations = instruction_->GetLocations();
923 DCHECK(locations->CanCall());
924 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
925 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
926 << "Unexpected instruction in read barrier for GC root slow path: "
927 << instruction_->DebugName();
928
929 __ Bind(GetEntryLabel());
930 SaveLiveRegisters(codegen, locations);
931
932 InvokeRuntimeCallingConvention calling_convention;
933 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
934 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
935 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
936 instruction_,
937 instruction_->GetDexPc(),
938 this);
939 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
940 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
941
942 RestoreLiveRegisters(codegen, locations);
943 __ jmp(GetExitLabel());
944 }
945
GetDescription() const946 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
947
948 private:
949 const Location out_;
950 const Location root_;
951
952 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
953 };
954
955 #undef __
956 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
957 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
958
X86_64IntegerCondition(IfCondition cond)959 inline Condition X86_64IntegerCondition(IfCondition cond) {
960 switch (cond) {
961 case kCondEQ: return kEqual;
962 case kCondNE: return kNotEqual;
963 case kCondLT: return kLess;
964 case kCondLE: return kLessEqual;
965 case kCondGT: return kGreater;
966 case kCondGE: return kGreaterEqual;
967 case kCondB: return kBelow;
968 case kCondBE: return kBelowEqual;
969 case kCondA: return kAbove;
970 case kCondAE: return kAboveEqual;
971 }
972 LOG(FATAL) << "Unreachable";
973 UNREACHABLE();
974 }
975
976 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)977 inline Condition X86_64FPCondition(IfCondition cond) {
978 switch (cond) {
979 case kCondEQ: return kEqual;
980 case kCondNE: return kNotEqual;
981 case kCondLT: return kBelow;
982 case kCondLE: return kBelowEqual;
983 case kCondGT: return kAbove;
984 case kCondGE: return kAboveEqual;
985 default: break; // should not happen
986 }
987 LOG(FATAL) << "Unreachable";
988 UNREACHABLE();
989 }
990
BlockNonVolatileXmmRegisters(LocationSummary * locations)991 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
992 // We have to ensure that the native code we call directly (such as @CriticalNative
993 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
994 // which are non-volatile for ART, but volatile for Native calls. This will ensure
995 // that they are saved in the prologue and properly restored.
996 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
997 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
998 }
999 }
1000
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)1001 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1002 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1003 ArtMethod* method ATTRIBUTE_UNUSED) {
1004 return desired_dispatch_info;
1005 }
1006
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1007 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1008 switch (load_kind) {
1009 case MethodLoadKind::kBootImageLinkTimePcRelative:
1010 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1011 __ leal(temp.AsRegister<CpuRegister>(),
1012 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1013 RecordBootImageMethodPatch(invoke);
1014 break;
1015 case MethodLoadKind::kBootImageRelRo: {
1016 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1017 __ movl(temp.AsRegister<CpuRegister>(),
1018 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1019 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1020 break;
1021 }
1022 case MethodLoadKind::kBssEntry: {
1023 __ movq(temp.AsRegister<CpuRegister>(),
1024 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1025 RecordMethodBssEntryPatch(invoke);
1026 // No need for memory fence, thanks to the x86-64 memory model.
1027 break;
1028 }
1029 case MethodLoadKind::kJitDirectAddress: {
1030 Load64BitValue(temp.AsRegister<CpuRegister>(),
1031 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1032 break;
1033 }
1034 case MethodLoadKind::kRuntimeCall: {
1035 // Test situation, don't do anything.
1036 break;
1037 }
1038 default: {
1039 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1040 UNREACHABLE();
1041 }
1042 }
1043 }
1044
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1045 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1046 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1047 // All registers are assumed to be correctly set up.
1048
1049 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1050 switch (invoke->GetMethodLoadKind()) {
1051 case MethodLoadKind::kStringInit: {
1052 // temp = thread->string_init_entrypoint
1053 uint32_t offset =
1054 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1055 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1056 break;
1057 }
1058 case MethodLoadKind::kRecursive: {
1059 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1060 break;
1061 }
1062 case MethodLoadKind::kRuntimeCall: {
1063 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1064 return; // No code pointer retrieval; the runtime performs the call directly.
1065 }
1066 case MethodLoadKind::kBootImageLinkTimePcRelative:
1067 // For kCallCriticalNative we skip loading the method and do the call directly.
1068 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1069 break;
1070 }
1071 FALLTHROUGH_INTENDED;
1072 default: {
1073 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1074 break;
1075 }
1076 }
1077
1078 switch (invoke->GetCodePtrLocation()) {
1079 case CodePtrLocation::kCallSelf:
1080 __ call(&frame_entry_label_);
1081 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1082 break;
1083 case CodePtrLocation::kCallCriticalNative: {
1084 size_t out_frame_size =
1085 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1086 kNativeStackAlignment,
1087 GetCriticalNativeDirectCallFrameSize>(invoke);
1088 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1089 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1090 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1091 RecordBootImageJniEntrypointPatch(invoke);
1092 } else {
1093 // (callee_method + offset_of_jni_entry_point)()
1094 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1095 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1096 }
1097 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1098 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1099 switch (invoke->GetType()) {
1100 case DataType::Type::kBool:
1101 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1102 break;
1103 case DataType::Type::kInt8:
1104 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1105 break;
1106 case DataType::Type::kUint16:
1107 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1108 break;
1109 case DataType::Type::kInt16:
1110 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1111 break;
1112 case DataType::Type::kInt32:
1113 case DataType::Type::kInt64:
1114 case DataType::Type::kFloat32:
1115 case DataType::Type::kFloat64:
1116 case DataType::Type::kVoid:
1117 break;
1118 default:
1119 DCHECK(false) << invoke->GetType();
1120 break;
1121 }
1122 if (out_frame_size != 0u) {
1123 DecreaseFrame(out_frame_size);
1124 }
1125 break;
1126 }
1127 case CodePtrLocation::kCallArtMethod:
1128 // (callee_method + offset_of_quick_compiled_code)()
1129 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1130 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1131 kX86_64PointerSize).SizeValue()));
1132 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1133 break;
1134 }
1135
1136 DCHECK(!IsLeafMethod());
1137 }
1138
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1139 void CodeGeneratorX86_64::GenerateVirtualCall(
1140 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1141 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1142 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1143 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1144
1145 // Use the calling convention instead of the location of the receiver, as
1146 // intrinsics may have put the receiver in a different register. In the intrinsics
1147 // slow path, the arguments have been moved to the right place, so here we are
1148 // guaranteed that the receiver is the first register of the calling convention.
1149 InvokeDexCallingConvention calling_convention;
1150 Register receiver = calling_convention.GetRegisterAt(0);
1151
1152 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1153 // /* HeapReference<Class> */ temp = receiver->klass_
1154 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1155 MaybeRecordImplicitNullCheck(invoke);
1156 // Instead of simply (possibly) unpoisoning `temp` here, we should
1157 // emit a read barrier for the previous class reference load.
1158 // However this is not required in practice, as this is an
1159 // intermediate/temporary reference and because the current
1160 // concurrent copying collector keeps the from-space memory
1161 // intact/accessible until the end of the marking phase (the
1162 // concurrent copying collector may not in the future).
1163 __ MaybeUnpoisonHeapReference(temp);
1164
1165 MaybeGenerateInlineCacheCheck(invoke, temp);
1166
1167 // temp = temp->GetMethodAt(method_offset);
1168 __ movq(temp, Address(temp, method_offset));
1169 // call temp->GetEntryPoint();
1170 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1171 kX86_64PointerSize).SizeValue()));
1172 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1173 }
1174
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1175 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1176 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1177 __ Bind(&boot_image_other_patches_.back().label);
1178 }
1179
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1180 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1181 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1182 __ Bind(&boot_image_other_patches_.back().label);
1183 }
1184
RecordBootImageMethodPatch(HInvoke * invoke)1185 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1186 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1187 invoke->GetResolvedMethodReference().index);
1188 __ Bind(&boot_image_method_patches_.back().label);
1189 }
1190
RecordMethodBssEntryPatch(HInvoke * invoke)1191 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1192 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file));
1193 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1194 invoke->GetMethodReference().index);
1195 __ Bind(&method_bss_entry_patches_.back().label);
1196 }
1197
RecordBootImageTypePatch(HLoadClass * load_class)1198 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1199 boot_image_type_patches_.emplace_back(
1200 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1201 __ Bind(&boot_image_type_patches_.back().label);
1202 }
1203
NewTypeBssEntryPatch(HLoadClass * load_class)1204 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1205 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1206 switch (load_class->GetLoadKind()) {
1207 case HLoadClass::LoadKind::kBssEntry:
1208 patches = &type_bss_entry_patches_;
1209 break;
1210 case HLoadClass::LoadKind::kBssEntryPublic:
1211 patches = &public_type_bss_entry_patches_;
1212 break;
1213 case HLoadClass::LoadKind::kBssEntryPackage:
1214 patches = &package_type_bss_entry_patches_;
1215 break;
1216 default:
1217 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1218 UNREACHABLE();
1219 }
1220 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1221 return &patches->back().label;
1222 }
1223
RecordBootImageStringPatch(HLoadString * load_string)1224 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1225 boot_image_string_patches_.emplace_back(
1226 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1227 __ Bind(&boot_image_string_patches_.back().label);
1228 }
1229
NewStringBssEntryPatch(HLoadString * load_string)1230 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1231 string_bss_entry_patches_.emplace_back(
1232 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1233 return &string_bss_entry_patches_.back().label;
1234 }
1235
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1236 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1237 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1238 invoke->GetResolvedMethodReference().index);
1239 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1240 }
1241
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1242 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1243 if (GetCompilerOptions().IsBootImage()) {
1244 __ leal(reg,
1245 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1246 RecordBootImageIntrinsicPatch(boot_image_reference);
1247 } else if (GetCompilerOptions().GetCompilePic()) {
1248 __ movl(reg,
1249 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1250 RecordBootImageRelRoPatch(boot_image_reference);
1251 } else {
1252 DCHECK(GetCompilerOptions().IsJitCompiler());
1253 gc::Heap* heap = Runtime::Current()->GetHeap();
1254 DCHECK(!heap->GetBootImageSpaces().empty());
1255 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1256 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1257 }
1258 }
1259
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1260 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1261 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1262 if (GetCompilerOptions().IsBootImage()) {
1263 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1264 __ leal(reg,
1265 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1266 MethodReference target_method = invoke->GetResolvedMethodReference();
1267 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1268 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1269 __ Bind(&boot_image_type_patches_.back().label);
1270 } else {
1271 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1272 LoadBootImageAddress(reg, boot_image_offset);
1273 }
1274 }
1275
1276 // The label points to the end of the "movl" or another instruction but the literal offset
1277 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1278 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1279
1280 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1281 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1282 const ArenaDeque<PatchInfo<Label>>& infos,
1283 ArenaVector<linker::LinkerPatch>* linker_patches) {
1284 for (const PatchInfo<Label>& info : infos) {
1285 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1286 linker_patches->push_back(
1287 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1288 }
1289 }
1290
1291 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1292 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1293 const DexFile* target_dex_file,
1294 uint32_t pc_insn_offset,
1295 uint32_t boot_image_offset) {
1296 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1297 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1298 }
1299
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1300 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1301 DCHECK(linker_patches->empty());
1302 size_t size =
1303 boot_image_method_patches_.size() +
1304 method_bss_entry_patches_.size() +
1305 boot_image_type_patches_.size() +
1306 type_bss_entry_patches_.size() +
1307 public_type_bss_entry_patches_.size() +
1308 package_type_bss_entry_patches_.size() +
1309 boot_image_string_patches_.size() +
1310 string_bss_entry_patches_.size() +
1311 boot_image_jni_entrypoint_patches_.size() +
1312 boot_image_other_patches_.size();
1313 linker_patches->reserve(size);
1314 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1315 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1316 boot_image_method_patches_, linker_patches);
1317 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1318 boot_image_type_patches_, linker_patches);
1319 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1320 boot_image_string_patches_, linker_patches);
1321 } else {
1322 DCHECK(boot_image_method_patches_.empty());
1323 DCHECK(boot_image_type_patches_.empty());
1324 DCHECK(boot_image_string_patches_.empty());
1325 }
1326 if (GetCompilerOptions().IsBootImage()) {
1327 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1328 boot_image_other_patches_, linker_patches);
1329 } else {
1330 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1331 boot_image_other_patches_, linker_patches);
1332 }
1333 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1334 method_bss_entry_patches_, linker_patches);
1335 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1336 type_bss_entry_patches_, linker_patches);
1337 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1338 public_type_bss_entry_patches_, linker_patches);
1339 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1340 package_type_bss_entry_patches_, linker_patches);
1341 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1342 string_bss_entry_patches_, linker_patches);
1343 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1344 boot_image_jni_entrypoint_patches_, linker_patches);
1345 DCHECK_EQ(size, linker_patches->size());
1346 }
1347
DumpCoreRegister(std::ostream & stream,int reg) const1348 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1349 stream << Register(reg);
1350 }
1351
DumpFloatingPointRegister(std::ostream & stream,int reg) const1352 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1353 stream << FloatRegister(reg);
1354 }
1355
GetInstructionSetFeatures() const1356 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1357 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1358 }
1359
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1360 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1361 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1362 return kX86_64WordSize;
1363 }
1364
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1365 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1366 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1367 return kX86_64WordSize;
1368 }
1369
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1370 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1371 if (GetGraph()->HasSIMD()) {
1372 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1373 } else {
1374 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1375 }
1376 return GetSlowPathFPWidth();
1377 }
1378
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1379 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1380 if (GetGraph()->HasSIMD()) {
1381 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1382 } else {
1383 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1384 }
1385 return GetSlowPathFPWidth();
1386 }
1387
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1388 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1389 HInstruction* instruction,
1390 uint32_t dex_pc,
1391 SlowPathCode* slow_path) {
1392 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1393 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1394 if (EntrypointRequiresStackMap(entrypoint)) {
1395 RecordPcInfo(instruction, dex_pc, slow_path);
1396 }
1397 }
1398
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1399 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1400 HInstruction* instruction,
1401 SlowPathCode* slow_path) {
1402 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1403 GenerateInvokeRuntime(entry_point_offset);
1404 }
1405
GenerateInvokeRuntime(int32_t entry_point_offset)1406 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1407 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1408 }
1409
1410 static constexpr int kNumberOfCpuRegisterPairs = 0;
1411 // Use a fake return address register to mimic Quick.
1412 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1413 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1414 const CompilerOptions& compiler_options,
1415 OptimizingCompilerStats* stats)
1416 : CodeGenerator(graph,
1417 kNumberOfCpuRegisters,
1418 kNumberOfFloatRegisters,
1419 kNumberOfCpuRegisterPairs,
1420 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1421 arraysize(kCoreCalleeSaves))
1422 | (1 << kFakeReturnRegister),
1423 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1424 arraysize(kFpuCalleeSaves)),
1425 compiler_options,
1426 stats),
1427 block_labels_(nullptr),
1428 location_builder_(graph, this),
1429 instruction_visitor_(graph, this),
1430 move_resolver_(graph->GetAllocator(), this),
1431 assembler_(graph->GetAllocator()),
1432 constant_area_start_(0),
1433 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1434 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1435 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1436 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1437 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1438 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1439 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1440 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1441 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1442 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1443 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1444 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1445 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1446 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1447 }
1448
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1449 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1450 CodeGeneratorX86_64* codegen)
1451 : InstructionCodeGenerator(graph, codegen),
1452 assembler_(codegen->GetAssembler()),
1453 codegen_(codegen) {}
1454
SetupBlockedRegisters() const1455 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1456 // Stack register is always reserved.
1457 blocked_core_registers_[RSP] = true;
1458
1459 // Block the register used as TMP.
1460 blocked_core_registers_[TMP] = true;
1461 }
1462
DWARFReg(Register reg)1463 static dwarf::Reg DWARFReg(Register reg) {
1464 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1465 }
1466
DWARFReg(FloatRegister reg)1467 static dwarf::Reg DWARFReg(FloatRegister reg) {
1468 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1469 }
1470
MaybeIncrementHotness(bool is_frame_entry)1471 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1472 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1473 NearLabel overflow;
1474 Register method = kMethodRegisterArgument;
1475 if (!is_frame_entry) {
1476 CHECK(RequiresCurrentMethod());
1477 method = TMP;
1478 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1479 }
1480 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1481 Immediate(ArtMethod::MaxCounter()));
1482 __ j(kEqual, &overflow);
1483 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1484 Immediate(1));
1485 __ Bind(&overflow);
1486 }
1487
1488 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1489 ScopedProfilingInfoUse spiu(
1490 Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
1491 ProfilingInfo* info = spiu.GetProfilingInfo();
1492 if (info != nullptr) {
1493 uint64_t address = reinterpret_cast64<uint64_t>(info);
1494 NearLabel done;
1495 __ movq(CpuRegister(TMP), Immediate(address));
1496 __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1497 Immediate(1));
1498 __ andw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1499 Immediate(interpreter::kTieredHotnessMask));
1500 __ j(kNotZero, &done);
1501 if (HasEmptyFrame()) {
1502 CHECK(is_frame_entry);
1503 // Frame alignment, and the stub expects the method on the stack.
1504 __ pushq(CpuRegister(RDI));
1505 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1506 __ cfi().RelOffset(DWARFReg(RDI), 0);
1507 } else if (!RequiresCurrentMethod()) {
1508 CHECK(is_frame_entry);
1509 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
1510 }
1511 GenerateInvokeRuntime(
1512 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1513 if (HasEmptyFrame()) {
1514 __ popq(CpuRegister(RDI));
1515 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1516 __ cfi().Restore(DWARFReg(RDI));
1517 }
1518 __ Bind(&done);
1519 }
1520 }
1521 }
1522
GenerateFrameEntry()1523 void CodeGeneratorX86_64::GenerateFrameEntry() {
1524 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1525 __ Bind(&frame_entry_label_);
1526 bool skip_overflow_check = IsLeafMethod()
1527 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1528 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1529
1530
1531 if (!skip_overflow_check) {
1532 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1533 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1534 RecordPcInfo(nullptr, 0);
1535 }
1536
1537 if (!HasEmptyFrame()) {
1538 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1539 Register reg = kCoreCalleeSaves[i];
1540 if (allocated_registers_.ContainsCoreRegister(reg)) {
1541 __ pushq(CpuRegister(reg));
1542 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1543 __ cfi().RelOffset(DWARFReg(reg), 0);
1544 }
1545 }
1546
1547 int adjust = GetFrameSize() - GetCoreSpillSize();
1548 IncreaseFrame(adjust);
1549 uint32_t xmm_spill_location = GetFpuSpillStart();
1550 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1551
1552 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1553 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1554 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1555 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1556 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1557 }
1558 }
1559
1560 // Save the current method if we need it. Note that we do not
1561 // do this in HCurrentMethod, as the instruction might have been removed
1562 // in the SSA graph.
1563 if (RequiresCurrentMethod()) {
1564 CHECK(!HasEmptyFrame());
1565 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1566 CpuRegister(kMethodRegisterArgument));
1567 }
1568
1569 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1570 CHECK(!HasEmptyFrame());
1571 // Initialize should_deoptimize flag to 0.
1572 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1573 }
1574 }
1575
1576 MaybeIncrementHotness(/* is_frame_entry= */ true);
1577 }
1578
GenerateFrameExit()1579 void CodeGeneratorX86_64::GenerateFrameExit() {
1580 __ cfi().RememberState();
1581 if (!HasEmptyFrame()) {
1582 uint32_t xmm_spill_location = GetFpuSpillStart();
1583 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1584 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1585 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1586 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1587 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1588 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1589 }
1590 }
1591
1592 int adjust = GetFrameSize() - GetCoreSpillSize();
1593 DecreaseFrame(adjust);
1594
1595 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1596 Register reg = kCoreCalleeSaves[i];
1597 if (allocated_registers_.ContainsCoreRegister(reg)) {
1598 __ popq(CpuRegister(reg));
1599 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1600 __ cfi().Restore(DWARFReg(reg));
1601 }
1602 }
1603 }
1604 __ ret();
1605 __ cfi().RestoreState();
1606 __ cfi().DefCFAOffset(GetFrameSize());
1607 }
1608
Bind(HBasicBlock * block)1609 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1610 __ Bind(GetLabelOf(block));
1611 }
1612
Move(Location destination,Location source)1613 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1614 if (source.Equals(destination)) {
1615 return;
1616 }
1617 if (destination.IsRegister()) {
1618 CpuRegister dest = destination.AsRegister<CpuRegister>();
1619 if (source.IsRegister()) {
1620 __ movq(dest, source.AsRegister<CpuRegister>());
1621 } else if (source.IsFpuRegister()) {
1622 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1623 } else if (source.IsStackSlot()) {
1624 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1625 } else if (source.IsConstant()) {
1626 HConstant* constant = source.GetConstant();
1627 if (constant->IsLongConstant()) {
1628 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1629 } else {
1630 Load32BitValue(dest, GetInt32ValueOf(constant));
1631 }
1632 } else {
1633 DCHECK(source.IsDoubleStackSlot());
1634 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1635 }
1636 } else if (destination.IsFpuRegister()) {
1637 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1638 if (source.IsRegister()) {
1639 __ movd(dest, source.AsRegister<CpuRegister>());
1640 } else if (source.IsFpuRegister()) {
1641 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1642 } else if (source.IsConstant()) {
1643 HConstant* constant = source.GetConstant();
1644 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1645 if (constant->IsFloatConstant()) {
1646 Load32BitValue(dest, static_cast<int32_t>(value));
1647 } else {
1648 Load64BitValue(dest, value);
1649 }
1650 } else if (source.IsStackSlot()) {
1651 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1652 } else {
1653 DCHECK(source.IsDoubleStackSlot());
1654 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1655 }
1656 } else if (destination.IsStackSlot()) {
1657 if (source.IsRegister()) {
1658 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1659 source.AsRegister<CpuRegister>());
1660 } else if (source.IsFpuRegister()) {
1661 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1662 source.AsFpuRegister<XmmRegister>());
1663 } else if (source.IsConstant()) {
1664 HConstant* constant = source.GetConstant();
1665 int32_t value = GetInt32ValueOf(constant);
1666 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1667 } else {
1668 DCHECK(source.IsStackSlot()) << source;
1669 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1670 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1671 }
1672 } else {
1673 DCHECK(destination.IsDoubleStackSlot());
1674 if (source.IsRegister()) {
1675 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1676 source.AsRegister<CpuRegister>());
1677 } else if (source.IsFpuRegister()) {
1678 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1679 source.AsFpuRegister<XmmRegister>());
1680 } else if (source.IsConstant()) {
1681 HConstant* constant = source.GetConstant();
1682 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1683 int64_t value = GetInt64ValueOf(constant);
1684 Store64BitValueToStack(destination, value);
1685 } else {
1686 DCHECK(source.IsDoubleStackSlot());
1687 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1688 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1689 }
1690 }
1691 }
1692
MoveConstant(Location location,int32_t value)1693 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1694 DCHECK(location.IsRegister());
1695 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1696 }
1697
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1698 void CodeGeneratorX86_64::MoveLocation(
1699 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1700 Move(dst, src);
1701 }
1702
AddLocationAsTemp(Location location,LocationSummary * locations)1703 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1704 if (location.IsRegister()) {
1705 locations->AddTemp(location);
1706 } else {
1707 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1708 }
1709 }
1710
HandleGoto(HInstruction * got,HBasicBlock * successor)1711 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1712 if (successor->IsExitBlock()) {
1713 DCHECK(got->GetPrevious()->AlwaysThrows());
1714 return; // no code needed
1715 }
1716
1717 HBasicBlock* block = got->GetBlock();
1718 HInstruction* previous = got->GetPrevious();
1719
1720 HLoopInformation* info = block->GetLoopInformation();
1721 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1722 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1723 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1724 return;
1725 }
1726
1727 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1728 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1729 }
1730 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1731 __ jmp(codegen_->GetLabelOf(successor));
1732 }
1733 }
1734
VisitGoto(HGoto * got)1735 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1736 got->SetLocations(nullptr);
1737 }
1738
VisitGoto(HGoto * got)1739 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1740 HandleGoto(got, got->GetSuccessor());
1741 }
1742
VisitTryBoundary(HTryBoundary * try_boundary)1743 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1744 try_boundary->SetLocations(nullptr);
1745 }
1746
VisitTryBoundary(HTryBoundary * try_boundary)1747 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1748 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1749 if (!successor->IsExitBlock()) {
1750 HandleGoto(try_boundary, successor);
1751 }
1752 }
1753
VisitExit(HExit * exit)1754 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1755 exit->SetLocations(nullptr);
1756 }
1757
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1758 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1759 }
1760
1761 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1762 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1763 LabelType* true_label,
1764 LabelType* false_label) {
1765 if (cond->IsFPConditionTrueIfNaN()) {
1766 __ j(kUnordered, true_label);
1767 } else if (cond->IsFPConditionFalseIfNaN()) {
1768 __ j(kUnordered, false_label);
1769 }
1770 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1771 }
1772
GenerateCompareTest(HCondition * condition)1773 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1774 LocationSummary* locations = condition->GetLocations();
1775
1776 Location left = locations->InAt(0);
1777 Location right = locations->InAt(1);
1778 DataType::Type type = condition->InputAt(0)->GetType();
1779 switch (type) {
1780 case DataType::Type::kBool:
1781 case DataType::Type::kUint8:
1782 case DataType::Type::kInt8:
1783 case DataType::Type::kUint16:
1784 case DataType::Type::kInt16:
1785 case DataType::Type::kInt32:
1786 case DataType::Type::kReference: {
1787 codegen_->GenerateIntCompare(left, right);
1788 break;
1789 }
1790 case DataType::Type::kInt64: {
1791 codegen_->GenerateLongCompare(left, right);
1792 break;
1793 }
1794 case DataType::Type::kFloat32: {
1795 if (right.IsFpuRegister()) {
1796 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1797 } else if (right.IsConstant()) {
1798 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1799 codegen_->LiteralFloatAddress(
1800 right.GetConstant()->AsFloatConstant()->GetValue()));
1801 } else {
1802 DCHECK(right.IsStackSlot());
1803 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1804 Address(CpuRegister(RSP), right.GetStackIndex()));
1805 }
1806 break;
1807 }
1808 case DataType::Type::kFloat64: {
1809 if (right.IsFpuRegister()) {
1810 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1811 } else if (right.IsConstant()) {
1812 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1813 codegen_->LiteralDoubleAddress(
1814 right.GetConstant()->AsDoubleConstant()->GetValue()));
1815 } else {
1816 DCHECK(right.IsDoubleStackSlot());
1817 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1818 Address(CpuRegister(RSP), right.GetStackIndex()));
1819 }
1820 break;
1821 }
1822 default:
1823 LOG(FATAL) << "Unexpected condition type " << type;
1824 }
1825 }
1826
1827 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1828 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1829 LabelType* true_target_in,
1830 LabelType* false_target_in) {
1831 // Generated branching requires both targets to be explicit. If either of the
1832 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1833 LabelType fallthrough_target;
1834 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1835 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1836
1837 // Generate the comparison to set the CC.
1838 GenerateCompareTest(condition);
1839
1840 // Now generate the correct jump(s).
1841 DataType::Type type = condition->InputAt(0)->GetType();
1842 switch (type) {
1843 case DataType::Type::kInt64: {
1844 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1845 break;
1846 }
1847 case DataType::Type::kFloat32: {
1848 GenerateFPJumps(condition, true_target, false_target);
1849 break;
1850 }
1851 case DataType::Type::kFloat64: {
1852 GenerateFPJumps(condition, true_target, false_target);
1853 break;
1854 }
1855 default:
1856 LOG(FATAL) << "Unexpected condition type " << type;
1857 }
1858
1859 if (false_target != &fallthrough_target) {
1860 __ jmp(false_target);
1861 }
1862
1863 if (fallthrough_target.IsLinked()) {
1864 __ Bind(&fallthrough_target);
1865 }
1866 }
1867
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1868 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1869 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1870 // are set only strictly before `branch`. We can't use the eflags on long
1871 // conditions if they are materialized due to the complex branching.
1872 return cond->IsCondition() &&
1873 cond->GetNext() == branch &&
1874 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1875 }
1876
1877 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1878 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1879 size_t condition_input_index,
1880 LabelType* true_target,
1881 LabelType* false_target) {
1882 HInstruction* cond = instruction->InputAt(condition_input_index);
1883
1884 if (true_target == nullptr && false_target == nullptr) {
1885 // Nothing to do. The code always falls through.
1886 return;
1887 } else if (cond->IsIntConstant()) {
1888 // Constant condition, statically compared against "true" (integer value 1).
1889 if (cond->AsIntConstant()->IsTrue()) {
1890 if (true_target != nullptr) {
1891 __ jmp(true_target);
1892 }
1893 } else {
1894 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1895 if (false_target != nullptr) {
1896 __ jmp(false_target);
1897 }
1898 }
1899 return;
1900 }
1901
1902 // The following code generates these patterns:
1903 // (1) true_target == nullptr && false_target != nullptr
1904 // - opposite condition true => branch to false_target
1905 // (2) true_target != nullptr && false_target == nullptr
1906 // - condition true => branch to true_target
1907 // (3) true_target != nullptr && false_target != nullptr
1908 // - condition true => branch to true_target
1909 // - branch to false_target
1910 if (IsBooleanValueOrMaterializedCondition(cond)) {
1911 if (AreEflagsSetFrom(cond, instruction)) {
1912 if (true_target == nullptr) {
1913 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1914 } else {
1915 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1916 }
1917 } else {
1918 // Materialized condition, compare against 0.
1919 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1920 if (lhs.IsRegister()) {
1921 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1922 } else {
1923 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1924 }
1925 if (true_target == nullptr) {
1926 __ j(kEqual, false_target);
1927 } else {
1928 __ j(kNotEqual, true_target);
1929 }
1930 }
1931 } else {
1932 // Condition has not been materialized, use its inputs as the
1933 // comparison and its condition as the branch condition.
1934 HCondition* condition = cond->AsCondition();
1935
1936 // If this is a long or FP comparison that has been folded into
1937 // the HCondition, generate the comparison directly.
1938 DataType::Type type = condition->InputAt(0)->GetType();
1939 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1940 GenerateCompareTestAndBranch(condition, true_target, false_target);
1941 return;
1942 }
1943
1944 Location lhs = condition->GetLocations()->InAt(0);
1945 Location rhs = condition->GetLocations()->InAt(1);
1946 codegen_->GenerateIntCompare(lhs, rhs);
1947 if (true_target == nullptr) {
1948 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1949 } else {
1950 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1951 }
1952 }
1953
1954 // If neither branch falls through (case 3), the conditional branch to `true_target`
1955 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1956 if (true_target != nullptr && false_target != nullptr) {
1957 __ jmp(false_target);
1958 }
1959 }
1960
VisitIf(HIf * if_instr)1961 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1962 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1963 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1964 locations->SetInAt(0, Location::Any());
1965 }
1966 }
1967
VisitIf(HIf * if_instr)1968 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1969 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1970 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1971 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1972 nullptr : codegen_->GetLabelOf(true_successor);
1973 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1974 nullptr : codegen_->GetLabelOf(false_successor);
1975 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1976 }
1977
VisitDeoptimize(HDeoptimize * deoptimize)1978 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1979 LocationSummary* locations = new (GetGraph()->GetAllocator())
1980 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1981 InvokeRuntimeCallingConvention calling_convention;
1982 RegisterSet caller_saves = RegisterSet::Empty();
1983 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1984 locations->SetCustomSlowPathCallerSaves(caller_saves);
1985 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1986 locations->SetInAt(0, Location::Any());
1987 }
1988 }
1989
VisitDeoptimize(HDeoptimize * deoptimize)1990 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1991 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1992 GenerateTestAndBranch<Label>(deoptimize,
1993 /* condition_input_index= */ 0,
1994 slow_path->GetEntryLabel(),
1995 /* false_target= */ nullptr);
1996 }
1997
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1998 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1999 LocationSummary* locations = new (GetGraph()->GetAllocator())
2000 LocationSummary(flag, LocationSummary::kNoCall);
2001 locations->SetOut(Location::RequiresRegister());
2002 }
2003
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2004 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2005 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2006 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2007 }
2008
SelectCanUseCMOV(HSelect * select)2009 static bool SelectCanUseCMOV(HSelect* select) {
2010 // There are no conditional move instructions for XMMs.
2011 if (DataType::IsFloatingPointType(select->GetType())) {
2012 return false;
2013 }
2014
2015 // A FP condition doesn't generate the single CC that we need.
2016 HInstruction* condition = select->GetCondition();
2017 if (condition->IsCondition() &&
2018 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2019 return false;
2020 }
2021
2022 // We can generate a CMOV for this Select.
2023 return true;
2024 }
2025
VisitSelect(HSelect * select)2026 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2027 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2028 if (DataType::IsFloatingPointType(select->GetType())) {
2029 locations->SetInAt(0, Location::RequiresFpuRegister());
2030 locations->SetInAt(1, Location::Any());
2031 } else {
2032 locations->SetInAt(0, Location::RequiresRegister());
2033 if (SelectCanUseCMOV(select)) {
2034 if (select->InputAt(1)->IsConstant()) {
2035 locations->SetInAt(1, Location::RequiresRegister());
2036 } else {
2037 locations->SetInAt(1, Location::Any());
2038 }
2039 } else {
2040 locations->SetInAt(1, Location::Any());
2041 }
2042 }
2043 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2044 locations->SetInAt(2, Location::RequiresRegister());
2045 }
2046 locations->SetOut(Location::SameAsFirstInput());
2047 }
2048
VisitSelect(HSelect * select)2049 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2050 LocationSummary* locations = select->GetLocations();
2051 if (SelectCanUseCMOV(select)) {
2052 // If both the condition and the source types are integer, we can generate
2053 // a CMOV to implement Select.
2054 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2055 Location value_true_loc = locations->InAt(1);
2056 DCHECK(locations->InAt(0).Equals(locations->Out()));
2057
2058 HInstruction* select_condition = select->GetCondition();
2059 Condition cond = kNotEqual;
2060
2061 // Figure out how to test the 'condition'.
2062 if (select_condition->IsCondition()) {
2063 HCondition* condition = select_condition->AsCondition();
2064 if (!condition->IsEmittedAtUseSite()) {
2065 // This was a previously materialized condition.
2066 // Can we use the existing condition code?
2067 if (AreEflagsSetFrom(condition, select)) {
2068 // Materialization was the previous instruction. Condition codes are right.
2069 cond = X86_64IntegerCondition(condition->GetCondition());
2070 } else {
2071 // No, we have to recreate the condition code.
2072 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2073 __ testl(cond_reg, cond_reg);
2074 }
2075 } else {
2076 GenerateCompareTest(condition);
2077 cond = X86_64IntegerCondition(condition->GetCondition());
2078 }
2079 } else {
2080 // Must be a Boolean condition, which needs to be compared to 0.
2081 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2082 __ testl(cond_reg, cond_reg);
2083 }
2084
2085 // If the condition is true, overwrite the output, which already contains false.
2086 // Generate the correct sized CMOV.
2087 bool is_64_bit = DataType::Is64BitType(select->GetType());
2088 if (value_true_loc.IsRegister()) {
2089 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2090 } else {
2091 __ cmov(cond,
2092 value_false,
2093 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2094 }
2095 } else {
2096 NearLabel false_target;
2097 GenerateTestAndBranch<NearLabel>(select,
2098 /* condition_input_index= */ 2,
2099 /* true_target= */ nullptr,
2100 &false_target);
2101 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2102 __ Bind(&false_target);
2103 }
2104 }
2105
VisitNativeDebugInfo(HNativeDebugInfo * info)2106 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2107 new (GetGraph()->GetAllocator()) LocationSummary(info);
2108 }
2109
VisitNativeDebugInfo(HNativeDebugInfo *)2110 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
2111 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2112 }
2113
IncreaseFrame(size_t adjustment)2114 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2115 __ subq(CpuRegister(RSP), Immediate(adjustment));
2116 __ cfi().AdjustCFAOffset(adjustment);
2117 }
2118
DecreaseFrame(size_t adjustment)2119 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2120 __ addq(CpuRegister(RSP), Immediate(adjustment));
2121 __ cfi().AdjustCFAOffset(-adjustment);
2122 }
2123
GenerateNop()2124 void CodeGeneratorX86_64::GenerateNop() {
2125 __ nop();
2126 }
2127
HandleCondition(HCondition * cond)2128 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2129 LocationSummary* locations =
2130 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2131 // Handle the long/FP comparisons made in instruction simplification.
2132 switch (cond->InputAt(0)->GetType()) {
2133 case DataType::Type::kInt64:
2134 locations->SetInAt(0, Location::RequiresRegister());
2135 locations->SetInAt(1, Location::Any());
2136 break;
2137 case DataType::Type::kFloat32:
2138 case DataType::Type::kFloat64:
2139 locations->SetInAt(0, Location::RequiresFpuRegister());
2140 locations->SetInAt(1, Location::Any());
2141 break;
2142 default:
2143 locations->SetInAt(0, Location::RequiresRegister());
2144 locations->SetInAt(1, Location::Any());
2145 break;
2146 }
2147 if (!cond->IsEmittedAtUseSite()) {
2148 locations->SetOut(Location::RequiresRegister());
2149 }
2150 }
2151
HandleCondition(HCondition * cond)2152 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2153 if (cond->IsEmittedAtUseSite()) {
2154 return;
2155 }
2156
2157 LocationSummary* locations = cond->GetLocations();
2158 Location lhs = locations->InAt(0);
2159 Location rhs = locations->InAt(1);
2160 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2161 NearLabel true_label, false_label;
2162
2163 switch (cond->InputAt(0)->GetType()) {
2164 default:
2165 // Integer case.
2166
2167 // Clear output register: setcc only sets the low byte.
2168 __ xorl(reg, reg);
2169
2170 codegen_->GenerateIntCompare(lhs, rhs);
2171 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2172 return;
2173 case DataType::Type::kInt64:
2174 // Clear output register: setcc only sets the low byte.
2175 __ xorl(reg, reg);
2176
2177 codegen_->GenerateLongCompare(lhs, rhs);
2178 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2179 return;
2180 case DataType::Type::kFloat32: {
2181 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2182 if (rhs.IsConstant()) {
2183 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2184 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2185 } else if (rhs.IsStackSlot()) {
2186 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2187 } else {
2188 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2189 }
2190 GenerateFPJumps(cond, &true_label, &false_label);
2191 break;
2192 }
2193 case DataType::Type::kFloat64: {
2194 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2195 if (rhs.IsConstant()) {
2196 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2197 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2198 } else if (rhs.IsDoubleStackSlot()) {
2199 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2200 } else {
2201 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2202 }
2203 GenerateFPJumps(cond, &true_label, &false_label);
2204 break;
2205 }
2206 }
2207
2208 // Convert the jumps into the result.
2209 NearLabel done_label;
2210
2211 // False case: result = 0.
2212 __ Bind(&false_label);
2213 __ xorl(reg, reg);
2214 __ jmp(&done_label);
2215
2216 // True case: result = 1.
2217 __ Bind(&true_label);
2218 __ movl(reg, Immediate(1));
2219 __ Bind(&done_label);
2220 }
2221
VisitEqual(HEqual * comp)2222 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2223 HandleCondition(comp);
2224 }
2225
VisitEqual(HEqual * comp)2226 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2227 HandleCondition(comp);
2228 }
2229
VisitNotEqual(HNotEqual * comp)2230 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2231 HandleCondition(comp);
2232 }
2233
VisitNotEqual(HNotEqual * comp)2234 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2235 HandleCondition(comp);
2236 }
2237
VisitLessThan(HLessThan * comp)2238 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2239 HandleCondition(comp);
2240 }
2241
VisitLessThan(HLessThan * comp)2242 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2243 HandleCondition(comp);
2244 }
2245
VisitLessThanOrEqual(HLessThanOrEqual * comp)2246 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2247 HandleCondition(comp);
2248 }
2249
VisitLessThanOrEqual(HLessThanOrEqual * comp)2250 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2251 HandleCondition(comp);
2252 }
2253
VisitGreaterThan(HGreaterThan * comp)2254 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2255 HandleCondition(comp);
2256 }
2257
VisitGreaterThan(HGreaterThan * comp)2258 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2259 HandleCondition(comp);
2260 }
2261
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2262 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2263 HandleCondition(comp);
2264 }
2265
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2266 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2267 HandleCondition(comp);
2268 }
2269
VisitBelow(HBelow * comp)2270 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2271 HandleCondition(comp);
2272 }
2273
VisitBelow(HBelow * comp)2274 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2275 HandleCondition(comp);
2276 }
2277
VisitBelowOrEqual(HBelowOrEqual * comp)2278 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2279 HandleCondition(comp);
2280 }
2281
VisitBelowOrEqual(HBelowOrEqual * comp)2282 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2283 HandleCondition(comp);
2284 }
2285
VisitAbove(HAbove * comp)2286 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2287 HandleCondition(comp);
2288 }
2289
VisitAbove(HAbove * comp)2290 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2291 HandleCondition(comp);
2292 }
2293
VisitAboveOrEqual(HAboveOrEqual * comp)2294 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2295 HandleCondition(comp);
2296 }
2297
VisitAboveOrEqual(HAboveOrEqual * comp)2298 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2299 HandleCondition(comp);
2300 }
2301
VisitCompare(HCompare * compare)2302 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2303 LocationSummary* locations =
2304 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2305 switch (compare->InputAt(0)->GetType()) {
2306 case DataType::Type::kBool:
2307 case DataType::Type::kUint8:
2308 case DataType::Type::kInt8:
2309 case DataType::Type::kUint16:
2310 case DataType::Type::kInt16:
2311 case DataType::Type::kInt32:
2312 case DataType::Type::kInt64: {
2313 locations->SetInAt(0, Location::RequiresRegister());
2314 locations->SetInAt(1, Location::Any());
2315 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2316 break;
2317 }
2318 case DataType::Type::kFloat32:
2319 case DataType::Type::kFloat64: {
2320 locations->SetInAt(0, Location::RequiresFpuRegister());
2321 locations->SetInAt(1, Location::Any());
2322 locations->SetOut(Location::RequiresRegister());
2323 break;
2324 }
2325 default:
2326 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2327 }
2328 }
2329
VisitCompare(HCompare * compare)2330 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2331 LocationSummary* locations = compare->GetLocations();
2332 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2333 Location left = locations->InAt(0);
2334 Location right = locations->InAt(1);
2335
2336 NearLabel less, greater, done;
2337 DataType::Type type = compare->InputAt(0)->GetType();
2338 Condition less_cond = kLess;
2339
2340 switch (type) {
2341 case DataType::Type::kBool:
2342 case DataType::Type::kUint8:
2343 case DataType::Type::kInt8:
2344 case DataType::Type::kUint16:
2345 case DataType::Type::kInt16:
2346 case DataType::Type::kInt32: {
2347 codegen_->GenerateIntCompare(left, right);
2348 break;
2349 }
2350 case DataType::Type::kInt64: {
2351 codegen_->GenerateLongCompare(left, right);
2352 break;
2353 }
2354 case DataType::Type::kFloat32: {
2355 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2356 if (right.IsConstant()) {
2357 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2358 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2359 } else if (right.IsStackSlot()) {
2360 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2361 } else {
2362 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2363 }
2364 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2365 less_cond = kBelow; // ucomis{s,d} sets CF
2366 break;
2367 }
2368 case DataType::Type::kFloat64: {
2369 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2370 if (right.IsConstant()) {
2371 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2372 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2373 } else if (right.IsDoubleStackSlot()) {
2374 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2375 } else {
2376 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2377 }
2378 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2379 less_cond = kBelow; // ucomis{s,d} sets CF
2380 break;
2381 }
2382 default:
2383 LOG(FATAL) << "Unexpected compare type " << type;
2384 }
2385
2386 __ movl(out, Immediate(0));
2387 __ j(kEqual, &done);
2388 __ j(less_cond, &less);
2389
2390 __ Bind(&greater);
2391 __ movl(out, Immediate(1));
2392 __ jmp(&done);
2393
2394 __ Bind(&less);
2395 __ movl(out, Immediate(-1));
2396
2397 __ Bind(&done);
2398 }
2399
VisitIntConstant(HIntConstant * constant)2400 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2401 LocationSummary* locations =
2402 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2403 locations->SetOut(Location::ConstantLocation(constant));
2404 }
2405
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2406 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2407 // Will be generated at use site.
2408 }
2409
VisitNullConstant(HNullConstant * constant)2410 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2411 LocationSummary* locations =
2412 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2413 locations->SetOut(Location::ConstantLocation(constant));
2414 }
2415
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2416 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2417 // Will be generated at use site.
2418 }
2419
VisitLongConstant(HLongConstant * constant)2420 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2421 LocationSummary* locations =
2422 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2423 locations->SetOut(Location::ConstantLocation(constant));
2424 }
2425
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2426 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2427 // Will be generated at use site.
2428 }
2429
VisitFloatConstant(HFloatConstant * constant)2430 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2431 LocationSummary* locations =
2432 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2433 locations->SetOut(Location::ConstantLocation(constant));
2434 }
2435
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2436 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2437 // Will be generated at use site.
2438 }
2439
VisitDoubleConstant(HDoubleConstant * constant)2440 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2441 LocationSummary* locations =
2442 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2443 locations->SetOut(Location::ConstantLocation(constant));
2444 }
2445
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2446 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2447 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2448 // Will be generated at use site.
2449 }
2450
VisitConstructorFence(HConstructorFence * constructor_fence)2451 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2452 constructor_fence->SetLocations(nullptr);
2453 }
2454
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2455 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2456 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2457 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2458 }
2459
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2460 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2461 memory_barrier->SetLocations(nullptr);
2462 }
2463
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2464 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2465 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2466 }
2467
VisitReturnVoid(HReturnVoid * ret)2468 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2469 ret->SetLocations(nullptr);
2470 }
2471
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2472 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2473 codegen_->GenerateFrameExit();
2474 }
2475
VisitReturn(HReturn * ret)2476 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2477 LocationSummary* locations =
2478 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2479 switch (ret->InputAt(0)->GetType()) {
2480 case DataType::Type::kReference:
2481 case DataType::Type::kBool:
2482 case DataType::Type::kUint8:
2483 case DataType::Type::kInt8:
2484 case DataType::Type::kUint16:
2485 case DataType::Type::kInt16:
2486 case DataType::Type::kInt32:
2487 case DataType::Type::kInt64:
2488 locations->SetInAt(0, Location::RegisterLocation(RAX));
2489 break;
2490
2491 case DataType::Type::kFloat32:
2492 case DataType::Type::kFloat64:
2493 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2494 break;
2495
2496 default:
2497 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2498 }
2499 }
2500
VisitReturn(HReturn * ret)2501 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2502 switch (ret->InputAt(0)->GetType()) {
2503 case DataType::Type::kReference:
2504 case DataType::Type::kBool:
2505 case DataType::Type::kUint8:
2506 case DataType::Type::kInt8:
2507 case DataType::Type::kUint16:
2508 case DataType::Type::kInt16:
2509 case DataType::Type::kInt32:
2510 case DataType::Type::kInt64:
2511 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2512 break;
2513
2514 case DataType::Type::kFloat32: {
2515 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2516 XMM0);
2517 // To simplify callers of an OSR method, we put the return value in both
2518 // floating point and core register.
2519 if (GetGraph()->IsCompilingOsr()) {
2520 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2521 }
2522 break;
2523 }
2524 case DataType::Type::kFloat64: {
2525 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2526 XMM0);
2527 // To simplify callers of an OSR method, we put the return value in both
2528 // floating point and core register.
2529 if (GetGraph()->IsCompilingOsr()) {
2530 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2531 }
2532 break;
2533 }
2534
2535 default:
2536 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2537 }
2538 codegen_->GenerateFrameExit();
2539 }
2540
GetReturnLocation(DataType::Type type) const2541 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2542 switch (type) {
2543 case DataType::Type::kReference:
2544 case DataType::Type::kBool:
2545 case DataType::Type::kUint8:
2546 case DataType::Type::kInt8:
2547 case DataType::Type::kUint16:
2548 case DataType::Type::kInt16:
2549 case DataType::Type::kUint32:
2550 case DataType::Type::kInt32:
2551 case DataType::Type::kUint64:
2552 case DataType::Type::kInt64:
2553 return Location::RegisterLocation(RAX);
2554
2555 case DataType::Type::kVoid:
2556 return Location::NoLocation();
2557
2558 case DataType::Type::kFloat64:
2559 case DataType::Type::kFloat32:
2560 return Location::FpuRegisterLocation(XMM0);
2561 }
2562
2563 UNREACHABLE();
2564 }
2565
GetMethodLocation() const2566 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2567 return Location::RegisterLocation(kMethodRegisterArgument);
2568 }
2569
GetNextLocation(DataType::Type type)2570 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2571 switch (type) {
2572 case DataType::Type::kReference:
2573 case DataType::Type::kBool:
2574 case DataType::Type::kUint8:
2575 case DataType::Type::kInt8:
2576 case DataType::Type::kUint16:
2577 case DataType::Type::kInt16:
2578 case DataType::Type::kInt32: {
2579 uint32_t index = gp_index_++;
2580 stack_index_++;
2581 if (index < calling_convention.GetNumberOfRegisters()) {
2582 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2583 } else {
2584 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2585 }
2586 }
2587
2588 case DataType::Type::kInt64: {
2589 uint32_t index = gp_index_;
2590 stack_index_ += 2;
2591 if (index < calling_convention.GetNumberOfRegisters()) {
2592 gp_index_ += 1;
2593 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2594 } else {
2595 gp_index_ += 2;
2596 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2597 }
2598 }
2599
2600 case DataType::Type::kFloat32: {
2601 uint32_t index = float_index_++;
2602 stack_index_++;
2603 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2604 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2605 } else {
2606 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2607 }
2608 }
2609
2610 case DataType::Type::kFloat64: {
2611 uint32_t index = float_index_++;
2612 stack_index_ += 2;
2613 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2614 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2615 } else {
2616 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2617 }
2618 }
2619
2620 case DataType::Type::kUint32:
2621 case DataType::Type::kUint64:
2622 case DataType::Type::kVoid:
2623 LOG(FATAL) << "Unexpected parameter type " << type;
2624 UNREACHABLE();
2625 }
2626 return Location::NoLocation();
2627 }
2628
GetNextLocation(DataType::Type type)2629 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2630 DCHECK_NE(type, DataType::Type::kReference);
2631
2632 Location location = Location::NoLocation();
2633 if (DataType::IsFloatingPointType(type)) {
2634 if (fpr_index_ < kParameterFloatRegistersLength) {
2635 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2636 ++fpr_index_;
2637 }
2638 } else {
2639 // Native ABI uses the same registers as managed, except that the method register RDI
2640 // is a normal argument.
2641 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2642 location = Location::RegisterLocation(
2643 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2644 ++gpr_index_;
2645 }
2646 }
2647 if (location.IsInvalid()) {
2648 if (DataType::Is64BitType(type)) {
2649 location = Location::DoubleStackSlot(stack_offset_);
2650 } else {
2651 location = Location::StackSlot(stack_offset_);
2652 }
2653 stack_offset_ += kFramePointerSize;
2654
2655 if (for_register_allocation_) {
2656 location = Location::Any();
2657 }
2658 }
2659 return location;
2660 }
2661
GetReturnLocation(DataType::Type type) const2662 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2663 const {
2664 // We perform conversion to the managed ABI return register after the call if needed.
2665 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2666 return dex_calling_convention.GetReturnLocation(type);
2667 }
2668
GetMethodLocation() const2669 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2670 // Pass the method in the hidden argument RAX.
2671 return Location::RegisterLocation(RAX);
2672 }
2673
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2674 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2675 // The trampoline uses the same calling convention as dex calling conventions,
2676 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2677 // the method_idx.
2678 HandleInvoke(invoke);
2679 }
2680
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2681 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2682 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2683 }
2684
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2685 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2686 // Explicit clinit checks triggered by static invokes must have been pruned by
2687 // art::PrepareForRegisterAllocation.
2688 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2689
2690 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2691 if (intrinsic.TryDispatch(invoke)) {
2692 return;
2693 }
2694
2695 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2696 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2697 /*for_register_allocation=*/ true);
2698 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2699 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2700 } else {
2701 HandleInvoke(invoke);
2702 }
2703 }
2704
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2705 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2706 if (invoke->GetLocations()->Intrinsified()) {
2707 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2708 intrinsic.Dispatch(invoke);
2709 return true;
2710 }
2711 return false;
2712 }
2713
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2714 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2715 // Explicit clinit checks triggered by static invokes must have been pruned by
2716 // art::PrepareForRegisterAllocation.
2717 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2718
2719 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2720 return;
2721 }
2722
2723 LocationSummary* locations = invoke->GetLocations();
2724 codegen_->GenerateStaticOrDirectCall(
2725 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2726 }
2727
HandleInvoke(HInvoke * invoke)2728 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2729 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2730 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2731 }
2732
VisitInvokeVirtual(HInvokeVirtual * invoke)2733 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2734 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2735 if (intrinsic.TryDispatch(invoke)) {
2736 return;
2737 }
2738
2739 HandleInvoke(invoke);
2740 }
2741
VisitInvokeVirtual(HInvokeVirtual * invoke)2742 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2743 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2744 return;
2745 }
2746
2747 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2748 DCHECK(!codegen_->IsLeafMethod());
2749 }
2750
VisitInvokeInterface(HInvokeInterface * invoke)2751 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2752 HandleInvoke(invoke);
2753 // Add the hidden argument.
2754 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2755 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2756 Location::RegisterLocation(RAX));
2757 }
2758 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2759 }
2760
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2761 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2762 CpuRegister klass) {
2763 DCHECK_EQ(RDI, klass.AsRegister());
2764 // We know the destination of an intrinsic, so no need to record inline
2765 // caches.
2766 if (!instruction->GetLocations()->Intrinsified() &&
2767 GetGraph()->IsCompilingBaseline() &&
2768 !Runtime::Current()->IsAotCompiler()) {
2769 ScopedProfilingInfoUse spiu(
2770 Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
2771 ProfilingInfo* info = spiu.GetProfilingInfo();
2772 if (info != nullptr) {
2773 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2774 uint64_t address = reinterpret_cast64<uint64_t>(cache);
2775 NearLabel done;
2776 __ movq(CpuRegister(TMP), Immediate(address));
2777 // Fast path for a monomorphic cache.
2778 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2779 __ j(kEqual, &done);
2780 GenerateInvokeRuntime(
2781 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2782 __ Bind(&done);
2783 }
2784 }
2785 }
2786
VisitInvokeInterface(HInvokeInterface * invoke)2787 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2788 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2789 LocationSummary* locations = invoke->GetLocations();
2790 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2791 Location receiver = locations->InAt(0);
2792 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2793
2794 if (receiver.IsStackSlot()) {
2795 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2796 // /* HeapReference<Class> */ temp = temp->klass_
2797 __ movl(temp, Address(temp, class_offset));
2798 } else {
2799 // /* HeapReference<Class> */ temp = receiver->klass_
2800 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2801 }
2802 codegen_->MaybeRecordImplicitNullCheck(invoke);
2803 // Instead of simply (possibly) unpoisoning `temp` here, we should
2804 // emit a read barrier for the previous class reference load.
2805 // However this is not required in practice, as this is an
2806 // intermediate/temporary reference and because the current
2807 // concurrent copying collector keeps the from-space memory
2808 // intact/accessible until the end of the marking phase (the
2809 // concurrent copying collector may not in the future).
2810 __ MaybeUnpoisonHeapReference(temp);
2811
2812 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2813
2814 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
2815 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2816 Location hidden_reg = locations->GetTemp(1);
2817 // Set the hidden argument. This is safe to do this here, as RAX
2818 // won't be modified thereafter, before the `call` instruction.
2819 // We also do it after MaybeGenerateInlineCache that may use RAX.
2820 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
2821 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
2822 }
2823
2824 // temp = temp->GetAddressOfIMT()
2825 __ movq(temp,
2826 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2827 // temp = temp->GetImtEntryAt(method_offset);
2828 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2829 invoke->GetImtIndex(), kX86_64PointerSize));
2830 // temp = temp->GetImtEntryAt(method_offset);
2831 __ movq(temp, Address(temp, method_offset));
2832 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2833 // We pass the method from the IMT in case of a conflict. This will ensure
2834 // we go into the runtime to resolve the actual method.
2835 Location hidden_reg = locations->GetTemp(1);
2836 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
2837 }
2838 // call temp->GetEntryPoint();
2839 __ call(Address(
2840 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2841
2842 DCHECK(!codegen_->IsLeafMethod());
2843 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2844 }
2845
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2846 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2847 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2848 if (intrinsic.TryDispatch(invoke)) {
2849 return;
2850 }
2851 HandleInvoke(invoke);
2852 }
2853
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2854 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2855 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2856 return;
2857 }
2858 codegen_->GenerateInvokePolymorphicCall(invoke);
2859 }
2860
VisitInvokeCustom(HInvokeCustom * invoke)2861 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2862 HandleInvoke(invoke);
2863 }
2864
VisitInvokeCustom(HInvokeCustom * invoke)2865 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2866 codegen_->GenerateInvokeCustomCall(invoke);
2867 }
2868
VisitNeg(HNeg * neg)2869 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2870 LocationSummary* locations =
2871 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2872 switch (neg->GetResultType()) {
2873 case DataType::Type::kInt32:
2874 case DataType::Type::kInt64:
2875 locations->SetInAt(0, Location::RequiresRegister());
2876 locations->SetOut(Location::SameAsFirstInput());
2877 break;
2878
2879 case DataType::Type::kFloat32:
2880 case DataType::Type::kFloat64:
2881 locations->SetInAt(0, Location::RequiresFpuRegister());
2882 locations->SetOut(Location::SameAsFirstInput());
2883 locations->AddTemp(Location::RequiresFpuRegister());
2884 break;
2885
2886 default:
2887 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2888 }
2889 }
2890
VisitNeg(HNeg * neg)2891 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2892 LocationSummary* locations = neg->GetLocations();
2893 Location out = locations->Out();
2894 Location in = locations->InAt(0);
2895 switch (neg->GetResultType()) {
2896 case DataType::Type::kInt32:
2897 DCHECK(in.IsRegister());
2898 DCHECK(in.Equals(out));
2899 __ negl(out.AsRegister<CpuRegister>());
2900 break;
2901
2902 case DataType::Type::kInt64:
2903 DCHECK(in.IsRegister());
2904 DCHECK(in.Equals(out));
2905 __ negq(out.AsRegister<CpuRegister>());
2906 break;
2907
2908 case DataType::Type::kFloat32: {
2909 DCHECK(in.Equals(out));
2910 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2911 // Implement float negation with an exclusive or with value
2912 // 0x80000000 (mask for bit 31, representing the sign of a
2913 // single-precision floating-point number).
2914 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2915 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2916 break;
2917 }
2918
2919 case DataType::Type::kFloat64: {
2920 DCHECK(in.Equals(out));
2921 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2922 // Implement double negation with an exclusive or with value
2923 // 0x8000000000000000 (mask for bit 63, representing the sign of
2924 // a double-precision floating-point number).
2925 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2926 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2927 break;
2928 }
2929
2930 default:
2931 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2932 }
2933 }
2934
VisitTypeConversion(HTypeConversion * conversion)2935 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2936 LocationSummary* locations =
2937 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2938 DataType::Type result_type = conversion->GetResultType();
2939 DataType::Type input_type = conversion->GetInputType();
2940 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2941 << input_type << " -> " << result_type;
2942
2943 switch (result_type) {
2944 case DataType::Type::kUint8:
2945 case DataType::Type::kInt8:
2946 case DataType::Type::kUint16:
2947 case DataType::Type::kInt16:
2948 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2949 locations->SetInAt(0, Location::Any());
2950 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2951 break;
2952
2953 case DataType::Type::kInt32:
2954 switch (input_type) {
2955 case DataType::Type::kInt64:
2956 locations->SetInAt(0, Location::Any());
2957 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2958 break;
2959
2960 case DataType::Type::kFloat32:
2961 locations->SetInAt(0, Location::RequiresFpuRegister());
2962 locations->SetOut(Location::RequiresRegister());
2963 break;
2964
2965 case DataType::Type::kFloat64:
2966 locations->SetInAt(0, Location::RequiresFpuRegister());
2967 locations->SetOut(Location::RequiresRegister());
2968 break;
2969
2970 default:
2971 LOG(FATAL) << "Unexpected type conversion from " << input_type
2972 << " to " << result_type;
2973 }
2974 break;
2975
2976 case DataType::Type::kInt64:
2977 switch (input_type) {
2978 case DataType::Type::kBool:
2979 case DataType::Type::kUint8:
2980 case DataType::Type::kInt8:
2981 case DataType::Type::kUint16:
2982 case DataType::Type::kInt16:
2983 case DataType::Type::kInt32:
2984 // TODO: We would benefit from a (to-be-implemented)
2985 // Location::RegisterOrStackSlot requirement for this input.
2986 locations->SetInAt(0, Location::RequiresRegister());
2987 locations->SetOut(Location::RequiresRegister());
2988 break;
2989
2990 case DataType::Type::kFloat32:
2991 locations->SetInAt(0, Location::RequiresFpuRegister());
2992 locations->SetOut(Location::RequiresRegister());
2993 break;
2994
2995 case DataType::Type::kFloat64:
2996 locations->SetInAt(0, Location::RequiresFpuRegister());
2997 locations->SetOut(Location::RequiresRegister());
2998 break;
2999
3000 default:
3001 LOG(FATAL) << "Unexpected type conversion from " << input_type
3002 << " to " << result_type;
3003 }
3004 break;
3005
3006 case DataType::Type::kFloat32:
3007 switch (input_type) {
3008 case DataType::Type::kBool:
3009 case DataType::Type::kUint8:
3010 case DataType::Type::kInt8:
3011 case DataType::Type::kUint16:
3012 case DataType::Type::kInt16:
3013 case DataType::Type::kInt32:
3014 locations->SetInAt(0, Location::Any());
3015 locations->SetOut(Location::RequiresFpuRegister());
3016 break;
3017
3018 case DataType::Type::kInt64:
3019 locations->SetInAt(0, Location::Any());
3020 locations->SetOut(Location::RequiresFpuRegister());
3021 break;
3022
3023 case DataType::Type::kFloat64:
3024 locations->SetInAt(0, Location::Any());
3025 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3026 break;
3027
3028 default:
3029 LOG(FATAL) << "Unexpected type conversion from " << input_type
3030 << " to " << result_type;
3031 }
3032 break;
3033
3034 case DataType::Type::kFloat64:
3035 switch (input_type) {
3036 case DataType::Type::kBool:
3037 case DataType::Type::kUint8:
3038 case DataType::Type::kInt8:
3039 case DataType::Type::kUint16:
3040 case DataType::Type::kInt16:
3041 case DataType::Type::kInt32:
3042 locations->SetInAt(0, Location::Any());
3043 locations->SetOut(Location::RequiresFpuRegister());
3044 break;
3045
3046 case DataType::Type::kInt64:
3047 locations->SetInAt(0, Location::Any());
3048 locations->SetOut(Location::RequiresFpuRegister());
3049 break;
3050
3051 case DataType::Type::kFloat32:
3052 locations->SetInAt(0, Location::Any());
3053 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3054 break;
3055
3056 default:
3057 LOG(FATAL) << "Unexpected type conversion from " << input_type
3058 << " to " << result_type;
3059 }
3060 break;
3061
3062 default:
3063 LOG(FATAL) << "Unexpected type conversion from " << input_type
3064 << " to " << result_type;
3065 }
3066 }
3067
VisitTypeConversion(HTypeConversion * conversion)3068 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3069 LocationSummary* locations = conversion->GetLocations();
3070 Location out = locations->Out();
3071 Location in = locations->InAt(0);
3072 DataType::Type result_type = conversion->GetResultType();
3073 DataType::Type input_type = conversion->GetInputType();
3074 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3075 << input_type << " -> " << result_type;
3076 switch (result_type) {
3077 case DataType::Type::kUint8:
3078 switch (input_type) {
3079 case DataType::Type::kInt8:
3080 case DataType::Type::kUint16:
3081 case DataType::Type::kInt16:
3082 case DataType::Type::kInt32:
3083 case DataType::Type::kInt64:
3084 if (in.IsRegister()) {
3085 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3086 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3087 __ movzxb(out.AsRegister<CpuRegister>(),
3088 Address(CpuRegister(RSP), in.GetStackIndex()));
3089 } else {
3090 __ movl(out.AsRegister<CpuRegister>(),
3091 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3092 }
3093 break;
3094
3095 default:
3096 LOG(FATAL) << "Unexpected type conversion from " << input_type
3097 << " to " << result_type;
3098 }
3099 break;
3100
3101 case DataType::Type::kInt8:
3102 switch (input_type) {
3103 case DataType::Type::kUint8:
3104 case DataType::Type::kUint16:
3105 case DataType::Type::kInt16:
3106 case DataType::Type::kInt32:
3107 case DataType::Type::kInt64:
3108 if (in.IsRegister()) {
3109 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3110 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3111 __ movsxb(out.AsRegister<CpuRegister>(),
3112 Address(CpuRegister(RSP), in.GetStackIndex()));
3113 } else {
3114 __ movl(out.AsRegister<CpuRegister>(),
3115 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3116 }
3117 break;
3118
3119 default:
3120 LOG(FATAL) << "Unexpected type conversion from " << input_type
3121 << " to " << result_type;
3122 }
3123 break;
3124
3125 case DataType::Type::kUint16:
3126 switch (input_type) {
3127 case DataType::Type::kInt8:
3128 case DataType::Type::kInt16:
3129 case DataType::Type::kInt32:
3130 case DataType::Type::kInt64:
3131 if (in.IsRegister()) {
3132 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3133 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3134 __ movzxw(out.AsRegister<CpuRegister>(),
3135 Address(CpuRegister(RSP), in.GetStackIndex()));
3136 } else {
3137 __ movl(out.AsRegister<CpuRegister>(),
3138 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3139 }
3140 break;
3141
3142 default:
3143 LOG(FATAL) << "Unexpected type conversion from " << input_type
3144 << " to " << result_type;
3145 }
3146 break;
3147
3148 case DataType::Type::kInt16:
3149 switch (input_type) {
3150 case DataType::Type::kUint16:
3151 case DataType::Type::kInt32:
3152 case DataType::Type::kInt64:
3153 if (in.IsRegister()) {
3154 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3155 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3156 __ movsxw(out.AsRegister<CpuRegister>(),
3157 Address(CpuRegister(RSP), in.GetStackIndex()));
3158 } else {
3159 __ movl(out.AsRegister<CpuRegister>(),
3160 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3161 }
3162 break;
3163
3164 default:
3165 LOG(FATAL) << "Unexpected type conversion from " << input_type
3166 << " to " << result_type;
3167 }
3168 break;
3169
3170 case DataType::Type::kInt32:
3171 switch (input_type) {
3172 case DataType::Type::kInt64:
3173 if (in.IsRegister()) {
3174 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3175 } else if (in.IsDoubleStackSlot()) {
3176 __ movl(out.AsRegister<CpuRegister>(),
3177 Address(CpuRegister(RSP), in.GetStackIndex()));
3178 } else {
3179 DCHECK(in.IsConstant());
3180 DCHECK(in.GetConstant()->IsLongConstant());
3181 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3182 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3183 }
3184 break;
3185
3186 case DataType::Type::kFloat32: {
3187 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3188 CpuRegister output = out.AsRegister<CpuRegister>();
3189 NearLabel done, nan;
3190
3191 __ movl(output, Immediate(kPrimIntMax));
3192 // if input >= (float)INT_MAX goto done
3193 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3194 __ j(kAboveEqual, &done);
3195 // if input == NaN goto nan
3196 __ j(kUnordered, &nan);
3197 // output = float-to-int-truncate(input)
3198 __ cvttss2si(output, input, false);
3199 __ jmp(&done);
3200 __ Bind(&nan);
3201 // output = 0
3202 __ xorl(output, output);
3203 __ Bind(&done);
3204 break;
3205 }
3206
3207 case DataType::Type::kFloat64: {
3208 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3209 CpuRegister output = out.AsRegister<CpuRegister>();
3210 NearLabel done, nan;
3211
3212 __ movl(output, Immediate(kPrimIntMax));
3213 // if input >= (double)INT_MAX goto done
3214 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3215 __ j(kAboveEqual, &done);
3216 // if input == NaN goto nan
3217 __ j(kUnordered, &nan);
3218 // output = double-to-int-truncate(input)
3219 __ cvttsd2si(output, input);
3220 __ jmp(&done);
3221 __ Bind(&nan);
3222 // output = 0
3223 __ xorl(output, output);
3224 __ Bind(&done);
3225 break;
3226 }
3227
3228 default:
3229 LOG(FATAL) << "Unexpected type conversion from " << input_type
3230 << " to " << result_type;
3231 }
3232 break;
3233
3234 case DataType::Type::kInt64:
3235 switch (input_type) {
3236 DCHECK(out.IsRegister());
3237 case DataType::Type::kBool:
3238 case DataType::Type::kUint8:
3239 case DataType::Type::kInt8:
3240 case DataType::Type::kUint16:
3241 case DataType::Type::kInt16:
3242 case DataType::Type::kInt32:
3243 DCHECK(in.IsRegister());
3244 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3245 break;
3246
3247 case DataType::Type::kFloat32: {
3248 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3249 CpuRegister output = out.AsRegister<CpuRegister>();
3250 NearLabel done, nan;
3251
3252 codegen_->Load64BitValue(output, kPrimLongMax);
3253 // if input >= (float)LONG_MAX goto done
3254 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3255 __ j(kAboveEqual, &done);
3256 // if input == NaN goto nan
3257 __ j(kUnordered, &nan);
3258 // output = float-to-long-truncate(input)
3259 __ cvttss2si(output, input, true);
3260 __ jmp(&done);
3261 __ Bind(&nan);
3262 // output = 0
3263 __ xorl(output, output);
3264 __ Bind(&done);
3265 break;
3266 }
3267
3268 case DataType::Type::kFloat64: {
3269 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3270 CpuRegister output = out.AsRegister<CpuRegister>();
3271 NearLabel done, nan;
3272
3273 codegen_->Load64BitValue(output, kPrimLongMax);
3274 // if input >= (double)LONG_MAX goto done
3275 __ comisd(input, codegen_->LiteralDoubleAddress(
3276 static_cast<double>(kPrimLongMax)));
3277 __ j(kAboveEqual, &done);
3278 // if input == NaN goto nan
3279 __ j(kUnordered, &nan);
3280 // output = double-to-long-truncate(input)
3281 __ cvttsd2si(output, input, true);
3282 __ jmp(&done);
3283 __ Bind(&nan);
3284 // output = 0
3285 __ xorl(output, output);
3286 __ Bind(&done);
3287 break;
3288 }
3289
3290 default:
3291 LOG(FATAL) << "Unexpected type conversion from " << input_type
3292 << " to " << result_type;
3293 }
3294 break;
3295
3296 case DataType::Type::kFloat32:
3297 switch (input_type) {
3298 case DataType::Type::kBool:
3299 case DataType::Type::kUint8:
3300 case DataType::Type::kInt8:
3301 case DataType::Type::kUint16:
3302 case DataType::Type::kInt16:
3303 case DataType::Type::kInt32:
3304 if (in.IsRegister()) {
3305 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3306 } else if (in.IsConstant()) {
3307 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3308 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3309 codegen_->Load32BitValue(dest, static_cast<float>(v));
3310 } else {
3311 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3312 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3313 }
3314 break;
3315
3316 case DataType::Type::kInt64:
3317 if (in.IsRegister()) {
3318 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3319 } else if (in.IsConstant()) {
3320 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3321 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3322 codegen_->Load32BitValue(dest, static_cast<float>(v));
3323 } else {
3324 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3325 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3326 }
3327 break;
3328
3329 case DataType::Type::kFloat64:
3330 if (in.IsFpuRegister()) {
3331 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3332 } else if (in.IsConstant()) {
3333 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3334 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3335 codegen_->Load32BitValue(dest, static_cast<float>(v));
3336 } else {
3337 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3338 Address(CpuRegister(RSP), in.GetStackIndex()));
3339 }
3340 break;
3341
3342 default:
3343 LOG(FATAL) << "Unexpected type conversion from " << input_type
3344 << " to " << result_type;
3345 }
3346 break;
3347
3348 case DataType::Type::kFloat64:
3349 switch (input_type) {
3350 case DataType::Type::kBool:
3351 case DataType::Type::kUint8:
3352 case DataType::Type::kInt8:
3353 case DataType::Type::kUint16:
3354 case DataType::Type::kInt16:
3355 case DataType::Type::kInt32:
3356 if (in.IsRegister()) {
3357 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3358 } else if (in.IsConstant()) {
3359 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3360 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3361 codegen_->Load64BitValue(dest, static_cast<double>(v));
3362 } else {
3363 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3364 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3365 }
3366 break;
3367
3368 case DataType::Type::kInt64:
3369 if (in.IsRegister()) {
3370 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3371 } else if (in.IsConstant()) {
3372 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3373 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3374 codegen_->Load64BitValue(dest, static_cast<double>(v));
3375 } else {
3376 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3377 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3378 }
3379 break;
3380
3381 case DataType::Type::kFloat32:
3382 if (in.IsFpuRegister()) {
3383 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3384 } else if (in.IsConstant()) {
3385 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3386 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3387 codegen_->Load64BitValue(dest, static_cast<double>(v));
3388 } else {
3389 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3390 Address(CpuRegister(RSP), in.GetStackIndex()));
3391 }
3392 break;
3393
3394 default:
3395 LOG(FATAL) << "Unexpected type conversion from " << input_type
3396 << " to " << result_type;
3397 }
3398 break;
3399
3400 default:
3401 LOG(FATAL) << "Unexpected type conversion from " << input_type
3402 << " to " << result_type;
3403 }
3404 }
3405
VisitAdd(HAdd * add)3406 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3407 LocationSummary* locations =
3408 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3409 switch (add->GetResultType()) {
3410 case DataType::Type::kInt32: {
3411 locations->SetInAt(0, Location::RequiresRegister());
3412 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3413 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3414 break;
3415 }
3416
3417 case DataType::Type::kInt64: {
3418 locations->SetInAt(0, Location::RequiresRegister());
3419 // We can use a leaq or addq if the constant can fit in an immediate.
3420 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3421 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3422 break;
3423 }
3424
3425 case DataType::Type::kFloat64:
3426 case DataType::Type::kFloat32: {
3427 locations->SetInAt(0, Location::RequiresFpuRegister());
3428 locations->SetInAt(1, Location::Any());
3429 locations->SetOut(Location::SameAsFirstInput());
3430 break;
3431 }
3432
3433 default:
3434 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3435 }
3436 }
3437
VisitAdd(HAdd * add)3438 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3439 LocationSummary* locations = add->GetLocations();
3440 Location first = locations->InAt(0);
3441 Location second = locations->InAt(1);
3442 Location out = locations->Out();
3443
3444 switch (add->GetResultType()) {
3445 case DataType::Type::kInt32: {
3446 if (second.IsRegister()) {
3447 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3448 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3449 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3450 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3451 } else {
3452 __ leal(out.AsRegister<CpuRegister>(), Address(
3453 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3454 }
3455 } else if (second.IsConstant()) {
3456 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3457 __ addl(out.AsRegister<CpuRegister>(),
3458 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3459 } else {
3460 __ leal(out.AsRegister<CpuRegister>(), Address(
3461 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3462 }
3463 } else {
3464 DCHECK(first.Equals(locations->Out()));
3465 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3466 }
3467 break;
3468 }
3469
3470 case DataType::Type::kInt64: {
3471 if (second.IsRegister()) {
3472 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3473 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3474 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3475 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3476 } else {
3477 __ leaq(out.AsRegister<CpuRegister>(), Address(
3478 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3479 }
3480 } else {
3481 DCHECK(second.IsConstant());
3482 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3483 int32_t int32_value = Low32Bits(value);
3484 DCHECK_EQ(int32_value, value);
3485 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3486 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3487 } else {
3488 __ leaq(out.AsRegister<CpuRegister>(), Address(
3489 first.AsRegister<CpuRegister>(), int32_value));
3490 }
3491 }
3492 break;
3493 }
3494
3495 case DataType::Type::kFloat32: {
3496 if (second.IsFpuRegister()) {
3497 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3498 } else if (second.IsConstant()) {
3499 __ addss(first.AsFpuRegister<XmmRegister>(),
3500 codegen_->LiteralFloatAddress(
3501 second.GetConstant()->AsFloatConstant()->GetValue()));
3502 } else {
3503 DCHECK(second.IsStackSlot());
3504 __ addss(first.AsFpuRegister<XmmRegister>(),
3505 Address(CpuRegister(RSP), second.GetStackIndex()));
3506 }
3507 break;
3508 }
3509
3510 case DataType::Type::kFloat64: {
3511 if (second.IsFpuRegister()) {
3512 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3513 } else if (second.IsConstant()) {
3514 __ addsd(first.AsFpuRegister<XmmRegister>(),
3515 codegen_->LiteralDoubleAddress(
3516 second.GetConstant()->AsDoubleConstant()->GetValue()));
3517 } else {
3518 DCHECK(second.IsDoubleStackSlot());
3519 __ addsd(first.AsFpuRegister<XmmRegister>(),
3520 Address(CpuRegister(RSP), second.GetStackIndex()));
3521 }
3522 break;
3523 }
3524
3525 default:
3526 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3527 }
3528 }
3529
VisitSub(HSub * sub)3530 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3531 LocationSummary* locations =
3532 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3533 switch (sub->GetResultType()) {
3534 case DataType::Type::kInt32: {
3535 locations->SetInAt(0, Location::RequiresRegister());
3536 locations->SetInAt(1, Location::Any());
3537 locations->SetOut(Location::SameAsFirstInput());
3538 break;
3539 }
3540 case DataType::Type::kInt64: {
3541 locations->SetInAt(0, Location::RequiresRegister());
3542 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3543 locations->SetOut(Location::SameAsFirstInput());
3544 break;
3545 }
3546 case DataType::Type::kFloat32:
3547 case DataType::Type::kFloat64: {
3548 locations->SetInAt(0, Location::RequiresFpuRegister());
3549 locations->SetInAt(1, Location::Any());
3550 locations->SetOut(Location::SameAsFirstInput());
3551 break;
3552 }
3553 default:
3554 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3555 }
3556 }
3557
VisitSub(HSub * sub)3558 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3559 LocationSummary* locations = sub->GetLocations();
3560 Location first = locations->InAt(0);
3561 Location second = locations->InAt(1);
3562 DCHECK(first.Equals(locations->Out()));
3563 switch (sub->GetResultType()) {
3564 case DataType::Type::kInt32: {
3565 if (second.IsRegister()) {
3566 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3567 } else if (second.IsConstant()) {
3568 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3569 __ subl(first.AsRegister<CpuRegister>(), imm);
3570 } else {
3571 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3572 }
3573 break;
3574 }
3575 case DataType::Type::kInt64: {
3576 if (second.IsConstant()) {
3577 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3578 DCHECK(IsInt<32>(value));
3579 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3580 } else {
3581 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3582 }
3583 break;
3584 }
3585
3586 case DataType::Type::kFloat32: {
3587 if (second.IsFpuRegister()) {
3588 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3589 } else if (second.IsConstant()) {
3590 __ subss(first.AsFpuRegister<XmmRegister>(),
3591 codegen_->LiteralFloatAddress(
3592 second.GetConstant()->AsFloatConstant()->GetValue()));
3593 } else {
3594 DCHECK(second.IsStackSlot());
3595 __ subss(first.AsFpuRegister<XmmRegister>(),
3596 Address(CpuRegister(RSP), second.GetStackIndex()));
3597 }
3598 break;
3599 }
3600
3601 case DataType::Type::kFloat64: {
3602 if (second.IsFpuRegister()) {
3603 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3604 } else if (second.IsConstant()) {
3605 __ subsd(first.AsFpuRegister<XmmRegister>(),
3606 codegen_->LiteralDoubleAddress(
3607 second.GetConstant()->AsDoubleConstant()->GetValue()));
3608 } else {
3609 DCHECK(second.IsDoubleStackSlot());
3610 __ subsd(first.AsFpuRegister<XmmRegister>(),
3611 Address(CpuRegister(RSP), second.GetStackIndex()));
3612 }
3613 break;
3614 }
3615
3616 default:
3617 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3618 }
3619 }
3620
VisitMul(HMul * mul)3621 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3622 LocationSummary* locations =
3623 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3624 switch (mul->GetResultType()) {
3625 case DataType::Type::kInt32: {
3626 locations->SetInAt(0, Location::RequiresRegister());
3627 locations->SetInAt(1, Location::Any());
3628 if (mul->InputAt(1)->IsIntConstant()) {
3629 // Can use 3 operand multiply.
3630 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3631 } else {
3632 locations->SetOut(Location::SameAsFirstInput());
3633 }
3634 break;
3635 }
3636 case DataType::Type::kInt64: {
3637 locations->SetInAt(0, Location::RequiresRegister());
3638 locations->SetInAt(1, Location::Any());
3639 if (mul->InputAt(1)->IsLongConstant() &&
3640 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3641 // Can use 3 operand multiply.
3642 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3643 } else {
3644 locations->SetOut(Location::SameAsFirstInput());
3645 }
3646 break;
3647 }
3648 case DataType::Type::kFloat32:
3649 case DataType::Type::kFloat64: {
3650 locations->SetInAt(0, Location::RequiresFpuRegister());
3651 locations->SetInAt(1, Location::Any());
3652 locations->SetOut(Location::SameAsFirstInput());
3653 break;
3654 }
3655
3656 default:
3657 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3658 }
3659 }
3660
VisitMul(HMul * mul)3661 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3662 LocationSummary* locations = mul->GetLocations();
3663 Location first = locations->InAt(0);
3664 Location second = locations->InAt(1);
3665 Location out = locations->Out();
3666 switch (mul->GetResultType()) {
3667 case DataType::Type::kInt32:
3668 // The constant may have ended up in a register, so test explicitly to avoid
3669 // problems where the output may not be the same as the first operand.
3670 if (mul->InputAt(1)->IsIntConstant()) {
3671 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3672 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3673 } else if (second.IsRegister()) {
3674 DCHECK(first.Equals(out));
3675 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3676 } else {
3677 DCHECK(first.Equals(out));
3678 DCHECK(second.IsStackSlot());
3679 __ imull(first.AsRegister<CpuRegister>(),
3680 Address(CpuRegister(RSP), second.GetStackIndex()));
3681 }
3682 break;
3683 case DataType::Type::kInt64: {
3684 // The constant may have ended up in a register, so test explicitly to avoid
3685 // problems where the output may not be the same as the first operand.
3686 if (mul->InputAt(1)->IsLongConstant()) {
3687 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3688 if (IsInt<32>(value)) {
3689 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3690 Immediate(static_cast<int32_t>(value)));
3691 } else {
3692 // Have to use the constant area.
3693 DCHECK(first.Equals(out));
3694 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3695 }
3696 } else if (second.IsRegister()) {
3697 DCHECK(first.Equals(out));
3698 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3699 } else {
3700 DCHECK(second.IsDoubleStackSlot());
3701 DCHECK(first.Equals(out));
3702 __ imulq(first.AsRegister<CpuRegister>(),
3703 Address(CpuRegister(RSP), second.GetStackIndex()));
3704 }
3705 break;
3706 }
3707
3708 case DataType::Type::kFloat32: {
3709 DCHECK(first.Equals(out));
3710 if (second.IsFpuRegister()) {
3711 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3712 } else if (second.IsConstant()) {
3713 __ mulss(first.AsFpuRegister<XmmRegister>(),
3714 codegen_->LiteralFloatAddress(
3715 second.GetConstant()->AsFloatConstant()->GetValue()));
3716 } else {
3717 DCHECK(second.IsStackSlot());
3718 __ mulss(first.AsFpuRegister<XmmRegister>(),
3719 Address(CpuRegister(RSP), second.GetStackIndex()));
3720 }
3721 break;
3722 }
3723
3724 case DataType::Type::kFloat64: {
3725 DCHECK(first.Equals(out));
3726 if (second.IsFpuRegister()) {
3727 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3728 } else if (second.IsConstant()) {
3729 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3730 codegen_->LiteralDoubleAddress(
3731 second.GetConstant()->AsDoubleConstant()->GetValue()));
3732 } else {
3733 DCHECK(second.IsDoubleStackSlot());
3734 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3735 Address(CpuRegister(RSP), second.GetStackIndex()));
3736 }
3737 break;
3738 }
3739
3740 default:
3741 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3742 }
3743 }
3744
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3745 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3746 uint32_t stack_adjustment, bool is_float) {
3747 if (source.IsStackSlot()) {
3748 DCHECK(is_float);
3749 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3750 } else if (source.IsDoubleStackSlot()) {
3751 DCHECK(!is_float);
3752 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3753 } else {
3754 // Write the value to the temporary location on the stack and load to FP stack.
3755 if (is_float) {
3756 Location stack_temp = Location::StackSlot(temp_offset);
3757 codegen_->Move(stack_temp, source);
3758 __ flds(Address(CpuRegister(RSP), temp_offset));
3759 } else {
3760 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3761 codegen_->Move(stack_temp, source);
3762 __ fldl(Address(CpuRegister(RSP), temp_offset));
3763 }
3764 }
3765 }
3766
GenerateRemFP(HRem * rem)3767 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3768 DataType::Type type = rem->GetResultType();
3769 bool is_float = type == DataType::Type::kFloat32;
3770 size_t elem_size = DataType::Size(type);
3771 LocationSummary* locations = rem->GetLocations();
3772 Location first = locations->InAt(0);
3773 Location second = locations->InAt(1);
3774 Location out = locations->Out();
3775
3776 // Create stack space for 2 elements.
3777 // TODO: enhance register allocator to ask for stack temporaries.
3778 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3779
3780 // Load the values to the FP stack in reverse order, using temporaries if needed.
3781 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3782 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3783
3784 // Loop doing FPREM until we stabilize.
3785 NearLabel retry;
3786 __ Bind(&retry);
3787 __ fprem();
3788
3789 // Move FP status to AX.
3790 __ fstsw();
3791
3792 // And see if the argument reduction is complete. This is signaled by the
3793 // C2 FPU flag bit set to 0.
3794 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3795 __ j(kNotEqual, &retry);
3796
3797 // We have settled on the final value. Retrieve it into an XMM register.
3798 // Store FP top of stack to real stack.
3799 if (is_float) {
3800 __ fsts(Address(CpuRegister(RSP), 0));
3801 } else {
3802 __ fstl(Address(CpuRegister(RSP), 0));
3803 }
3804
3805 // Pop the 2 items from the FP stack.
3806 __ fucompp();
3807
3808 // Load the value from the stack into an XMM register.
3809 DCHECK(out.IsFpuRegister()) << out;
3810 if (is_float) {
3811 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3812 } else {
3813 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3814 }
3815
3816 // And remove the temporary stack space we allocated.
3817 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3818 }
3819
DivRemOneOrMinusOne(HBinaryOperation * instruction)3820 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3821 DCHECK(instruction->IsDiv() || instruction->IsRem());
3822
3823 LocationSummary* locations = instruction->GetLocations();
3824 Location second = locations->InAt(1);
3825 DCHECK(second.IsConstant());
3826
3827 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3828 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3829 int64_t imm = Int64FromConstant(second.GetConstant());
3830
3831 DCHECK(imm == 1 || imm == -1);
3832
3833 switch (instruction->GetResultType()) {
3834 case DataType::Type::kInt32: {
3835 if (instruction->IsRem()) {
3836 __ xorl(output_register, output_register);
3837 } else {
3838 __ movl(output_register, input_register);
3839 if (imm == -1) {
3840 __ negl(output_register);
3841 }
3842 }
3843 break;
3844 }
3845
3846 case DataType::Type::kInt64: {
3847 if (instruction->IsRem()) {
3848 __ xorl(output_register, output_register);
3849 } else {
3850 __ movq(output_register, input_register);
3851 if (imm == -1) {
3852 __ negq(output_register);
3853 }
3854 }
3855 break;
3856 }
3857
3858 default:
3859 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3860 }
3861 }
RemByPowerOfTwo(HRem * instruction)3862 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3863 LocationSummary* locations = instruction->GetLocations();
3864 Location second = locations->InAt(1);
3865 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3866 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3867 int64_t imm = Int64FromConstant(second.GetConstant());
3868 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3869 uint64_t abs_imm = AbsOrMin(imm);
3870 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3871 if (instruction->GetResultType() == DataType::Type::kInt32) {
3872 NearLabel done;
3873 __ movl(out, numerator);
3874 __ andl(out, Immediate(abs_imm-1));
3875 __ j(Condition::kZero, &done);
3876 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3877 __ testl(numerator, numerator);
3878 __ cmov(Condition::kLess, out, tmp, false);
3879 __ Bind(&done);
3880
3881 } else {
3882 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3883 codegen_->Load64BitValue(tmp, abs_imm - 1);
3884 NearLabel done;
3885
3886 __ movq(out, numerator);
3887 __ andq(out, tmp);
3888 __ j(Condition::kZero, &done);
3889 __ movq(tmp, numerator);
3890 __ sarq(tmp, Immediate(63));
3891 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3892 __ orq(out, tmp);
3893 __ Bind(&done);
3894 }
3895 }
DivByPowerOfTwo(HDiv * instruction)3896 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3897 LocationSummary* locations = instruction->GetLocations();
3898 Location second = locations->InAt(1);
3899
3900 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3901 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3902
3903 int64_t imm = Int64FromConstant(second.GetConstant());
3904 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3905 uint64_t abs_imm = AbsOrMin(imm);
3906
3907 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3908
3909 if (instruction->GetResultType() == DataType::Type::kInt32) {
3910 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3911 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3912 if (abs_imm == 2) {
3913 __ leal(tmp, Address(numerator, 0));
3914 __ shrl(tmp, Immediate(31));
3915 __ addl(tmp, numerator);
3916 } else {
3917 __ leal(tmp, Address(numerator, abs_imm - 1));
3918 __ testl(numerator, numerator);
3919 __ cmov(kGreaterEqual, tmp, numerator);
3920 }
3921 int shift = CTZ(imm);
3922 __ sarl(tmp, Immediate(shift));
3923
3924 if (imm < 0) {
3925 __ negl(tmp);
3926 }
3927
3928 __ movl(output_register, tmp);
3929 } else {
3930 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3931 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3932 if (abs_imm == 2) {
3933 __ movq(rdx, numerator);
3934 __ shrq(rdx, Immediate(63));
3935 __ addq(rdx, numerator);
3936 } else {
3937 codegen_->Load64BitValue(rdx, abs_imm - 1);
3938 __ addq(rdx, numerator);
3939 __ testq(numerator, numerator);
3940 __ cmov(kGreaterEqual, rdx, numerator);
3941 }
3942 int shift = CTZ(imm);
3943 __ sarq(rdx, Immediate(shift));
3944
3945 if (imm < 0) {
3946 __ negq(rdx);
3947 }
3948
3949 __ movq(output_register, rdx);
3950 }
3951 }
3952
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3953 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3954 DCHECK(instruction->IsDiv() || instruction->IsRem());
3955
3956 LocationSummary* locations = instruction->GetLocations();
3957 Location second = locations->InAt(1);
3958
3959 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3960 : locations->GetTemp(0).AsRegister<CpuRegister>();
3961 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3962 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3963 : locations->Out().AsRegister<CpuRegister>();
3964 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3965
3966 DCHECK_EQ(RAX, eax.AsRegister());
3967 DCHECK_EQ(RDX, edx.AsRegister());
3968 if (instruction->IsDiv()) {
3969 DCHECK_EQ(RAX, out.AsRegister());
3970 } else {
3971 DCHECK_EQ(RDX, out.AsRegister());
3972 }
3973
3974 int64_t magic;
3975 int shift;
3976
3977 // TODO: can these branches be written as one?
3978 if (instruction->GetResultType() == DataType::Type::kInt32) {
3979 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3980
3981 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3982
3983 __ movl(numerator, eax);
3984
3985 __ movl(eax, Immediate(magic));
3986 __ imull(numerator);
3987
3988 if (imm > 0 && magic < 0) {
3989 __ addl(edx, numerator);
3990 } else if (imm < 0 && magic > 0) {
3991 __ subl(edx, numerator);
3992 }
3993
3994 if (shift != 0) {
3995 __ sarl(edx, Immediate(shift));
3996 }
3997
3998 __ movl(eax, edx);
3999 __ shrl(edx, Immediate(31));
4000 __ addl(edx, eax);
4001
4002 if (instruction->IsRem()) {
4003 __ movl(eax, numerator);
4004 __ imull(edx, Immediate(imm));
4005 __ subl(eax, edx);
4006 __ movl(edx, eax);
4007 } else {
4008 __ movl(eax, edx);
4009 }
4010 } else {
4011 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4012
4013 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4014
4015 CpuRegister rax = eax;
4016 CpuRegister rdx = edx;
4017
4018 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4019
4020 // Save the numerator.
4021 __ movq(numerator, rax);
4022
4023 // RAX = magic
4024 codegen_->Load64BitValue(rax, magic);
4025
4026 // RDX:RAX = magic * numerator
4027 __ imulq(numerator);
4028
4029 if (imm > 0 && magic < 0) {
4030 // RDX += numerator
4031 __ addq(rdx, numerator);
4032 } else if (imm < 0 && magic > 0) {
4033 // RDX -= numerator
4034 __ subq(rdx, numerator);
4035 }
4036
4037 // Shift if needed.
4038 if (shift != 0) {
4039 __ sarq(rdx, Immediate(shift));
4040 }
4041
4042 // RDX += 1 if RDX < 0
4043 __ movq(rax, rdx);
4044 __ shrq(rdx, Immediate(63));
4045 __ addq(rdx, rax);
4046
4047 if (instruction->IsRem()) {
4048 __ movq(rax, numerator);
4049
4050 if (IsInt<32>(imm)) {
4051 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4052 } else {
4053 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4054 }
4055
4056 __ subq(rax, rdx);
4057 __ movq(rdx, rax);
4058 } else {
4059 __ movq(rax, rdx);
4060 }
4061 }
4062 }
4063
GenerateDivRemIntegral(HBinaryOperation * instruction)4064 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4065 DCHECK(instruction->IsDiv() || instruction->IsRem());
4066 DataType::Type type = instruction->GetResultType();
4067 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4068
4069 bool is_div = instruction->IsDiv();
4070 LocationSummary* locations = instruction->GetLocations();
4071
4072 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4073 Location second = locations->InAt(1);
4074
4075 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4076 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4077
4078 if (second.IsConstant()) {
4079 int64_t imm = Int64FromConstant(second.GetConstant());
4080
4081 if (imm == 0) {
4082 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4083 } else if (imm == 1 || imm == -1) {
4084 DivRemOneOrMinusOne(instruction);
4085 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4086 if (is_div) {
4087 DivByPowerOfTwo(instruction->AsDiv());
4088 } else {
4089 RemByPowerOfTwo(instruction->AsRem());
4090 }
4091 } else {
4092 DCHECK(imm <= -2 || imm >= 2);
4093 GenerateDivRemWithAnyConstant(instruction);
4094 }
4095 } else {
4096 SlowPathCode* slow_path =
4097 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4098 instruction, out.AsRegister(), type, is_div);
4099 codegen_->AddSlowPath(slow_path);
4100
4101 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4102 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4103 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4104 // so it's safe to just use negl instead of more complex comparisons.
4105 if (type == DataType::Type::kInt32) {
4106 __ cmpl(second_reg, Immediate(-1));
4107 __ j(kEqual, slow_path->GetEntryLabel());
4108 // edx:eax <- sign-extended of eax
4109 __ cdq();
4110 // eax = quotient, edx = remainder
4111 __ idivl(second_reg);
4112 } else {
4113 __ cmpq(second_reg, Immediate(-1));
4114 __ j(kEqual, slow_path->GetEntryLabel());
4115 // rdx:rax <- sign-extended of rax
4116 __ cqo();
4117 // rax = quotient, rdx = remainder
4118 __ idivq(second_reg);
4119 }
4120 __ Bind(slow_path->GetExitLabel());
4121 }
4122 }
4123
VisitDiv(HDiv * div)4124 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4125 LocationSummary* locations =
4126 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4127 switch (div->GetResultType()) {
4128 case DataType::Type::kInt32:
4129 case DataType::Type::kInt64: {
4130 locations->SetInAt(0, Location::RegisterLocation(RAX));
4131 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4132 locations->SetOut(Location::SameAsFirstInput());
4133 // Intel uses edx:eax as the dividend.
4134 locations->AddTemp(Location::RegisterLocation(RDX));
4135 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4136 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4137 // output and request another temp.
4138 if (div->InputAt(1)->IsConstant()) {
4139 locations->AddTemp(Location::RequiresRegister());
4140 }
4141 break;
4142 }
4143
4144 case DataType::Type::kFloat32:
4145 case DataType::Type::kFloat64: {
4146 locations->SetInAt(0, Location::RequiresFpuRegister());
4147 locations->SetInAt(1, Location::Any());
4148 locations->SetOut(Location::SameAsFirstInput());
4149 break;
4150 }
4151
4152 default:
4153 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4154 }
4155 }
4156
VisitDiv(HDiv * div)4157 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4158 LocationSummary* locations = div->GetLocations();
4159 Location first = locations->InAt(0);
4160 Location second = locations->InAt(1);
4161 DCHECK(first.Equals(locations->Out()));
4162
4163 DataType::Type type = div->GetResultType();
4164 switch (type) {
4165 case DataType::Type::kInt32:
4166 case DataType::Type::kInt64: {
4167 GenerateDivRemIntegral(div);
4168 break;
4169 }
4170
4171 case DataType::Type::kFloat32: {
4172 if (second.IsFpuRegister()) {
4173 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4174 } else if (second.IsConstant()) {
4175 __ divss(first.AsFpuRegister<XmmRegister>(),
4176 codegen_->LiteralFloatAddress(
4177 second.GetConstant()->AsFloatConstant()->GetValue()));
4178 } else {
4179 DCHECK(second.IsStackSlot());
4180 __ divss(first.AsFpuRegister<XmmRegister>(),
4181 Address(CpuRegister(RSP), second.GetStackIndex()));
4182 }
4183 break;
4184 }
4185
4186 case DataType::Type::kFloat64: {
4187 if (second.IsFpuRegister()) {
4188 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4189 } else if (second.IsConstant()) {
4190 __ divsd(first.AsFpuRegister<XmmRegister>(),
4191 codegen_->LiteralDoubleAddress(
4192 second.GetConstant()->AsDoubleConstant()->GetValue()));
4193 } else {
4194 DCHECK(second.IsDoubleStackSlot());
4195 __ divsd(first.AsFpuRegister<XmmRegister>(),
4196 Address(CpuRegister(RSP), second.GetStackIndex()));
4197 }
4198 break;
4199 }
4200
4201 default:
4202 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4203 }
4204 }
4205
VisitRem(HRem * rem)4206 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4207 DataType::Type type = rem->GetResultType();
4208 LocationSummary* locations =
4209 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4210
4211 switch (type) {
4212 case DataType::Type::kInt32:
4213 case DataType::Type::kInt64: {
4214 locations->SetInAt(0, Location::RegisterLocation(RAX));
4215 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4216 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4217 locations->SetOut(Location::RegisterLocation(RDX));
4218 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4219 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4220 // output and request another temp.
4221 if (rem->InputAt(1)->IsConstant()) {
4222 locations->AddTemp(Location::RequiresRegister());
4223 }
4224 break;
4225 }
4226
4227 case DataType::Type::kFloat32:
4228 case DataType::Type::kFloat64: {
4229 locations->SetInAt(0, Location::Any());
4230 locations->SetInAt(1, Location::Any());
4231 locations->SetOut(Location::RequiresFpuRegister());
4232 locations->AddTemp(Location::RegisterLocation(RAX));
4233 break;
4234 }
4235
4236 default:
4237 LOG(FATAL) << "Unexpected rem type " << type;
4238 }
4239 }
4240
VisitRem(HRem * rem)4241 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4242 DataType::Type type = rem->GetResultType();
4243 switch (type) {
4244 case DataType::Type::kInt32:
4245 case DataType::Type::kInt64: {
4246 GenerateDivRemIntegral(rem);
4247 break;
4248 }
4249 case DataType::Type::kFloat32:
4250 case DataType::Type::kFloat64: {
4251 GenerateRemFP(rem);
4252 break;
4253 }
4254 default:
4255 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4256 }
4257 }
4258
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4259 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4260 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4261 switch (minmax->GetResultType()) {
4262 case DataType::Type::kInt32:
4263 case DataType::Type::kInt64:
4264 locations->SetInAt(0, Location::RequiresRegister());
4265 locations->SetInAt(1, Location::RequiresRegister());
4266 locations->SetOut(Location::SameAsFirstInput());
4267 break;
4268 case DataType::Type::kFloat32:
4269 case DataType::Type::kFloat64:
4270 locations->SetInAt(0, Location::RequiresFpuRegister());
4271 locations->SetInAt(1, Location::RequiresFpuRegister());
4272 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4273 // the second input to be the output (we can simply swap inputs).
4274 locations->SetOut(Location::SameAsFirstInput());
4275 break;
4276 default:
4277 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4278 }
4279 }
4280
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4281 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4282 bool is_min,
4283 DataType::Type type) {
4284 Location op1_loc = locations->InAt(0);
4285 Location op2_loc = locations->InAt(1);
4286
4287 // Shortcut for same input locations.
4288 if (op1_loc.Equals(op2_loc)) {
4289 // Can return immediately, as op1_loc == out_loc.
4290 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4291 // a copy here.
4292 DCHECK(locations->Out().Equals(op1_loc));
4293 return;
4294 }
4295
4296 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4297 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4298
4299 // (out := op1)
4300 // out <=? op2
4301 // if out is min jmp done
4302 // out := op2
4303 // done:
4304
4305 if (type == DataType::Type::kInt64) {
4306 __ cmpq(out, op2);
4307 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4308 } else {
4309 DCHECK_EQ(type, DataType::Type::kInt32);
4310 __ cmpl(out, op2);
4311 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4312 }
4313 }
4314
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4315 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4316 bool is_min,
4317 DataType::Type type) {
4318 Location op1_loc = locations->InAt(0);
4319 Location op2_loc = locations->InAt(1);
4320 Location out_loc = locations->Out();
4321 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4322
4323 // Shortcut for same input locations.
4324 if (op1_loc.Equals(op2_loc)) {
4325 DCHECK(out_loc.Equals(op1_loc));
4326 return;
4327 }
4328
4329 // (out := op1)
4330 // out <=? op2
4331 // if Nan jmp Nan_label
4332 // if out is min jmp done
4333 // if op2 is min jmp op2_label
4334 // handle -0/+0
4335 // jmp done
4336 // Nan_label:
4337 // out := NaN
4338 // op2_label:
4339 // out := op2
4340 // done:
4341 //
4342 // This removes one jmp, but needs to copy one input (op1) to out.
4343 //
4344 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4345
4346 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4347
4348 NearLabel nan, done, op2_label;
4349 if (type == DataType::Type::kFloat64) {
4350 __ ucomisd(out, op2);
4351 } else {
4352 DCHECK_EQ(type, DataType::Type::kFloat32);
4353 __ ucomiss(out, op2);
4354 }
4355
4356 __ j(Condition::kParityEven, &nan);
4357
4358 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4359 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4360
4361 // Handle 0.0/-0.0.
4362 if (is_min) {
4363 if (type == DataType::Type::kFloat64) {
4364 __ orpd(out, op2);
4365 } else {
4366 __ orps(out, op2);
4367 }
4368 } else {
4369 if (type == DataType::Type::kFloat64) {
4370 __ andpd(out, op2);
4371 } else {
4372 __ andps(out, op2);
4373 }
4374 }
4375 __ jmp(&done);
4376
4377 // NaN handling.
4378 __ Bind(&nan);
4379 if (type == DataType::Type::kFloat64) {
4380 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4381 } else {
4382 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4383 }
4384 __ jmp(&done);
4385
4386 // out := op2;
4387 __ Bind(&op2_label);
4388 if (type == DataType::Type::kFloat64) {
4389 __ movsd(out, op2);
4390 } else {
4391 __ movss(out, op2);
4392 }
4393
4394 // Done.
4395 __ Bind(&done);
4396 }
4397
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4398 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4399 DataType::Type type = minmax->GetResultType();
4400 switch (type) {
4401 case DataType::Type::kInt32:
4402 case DataType::Type::kInt64:
4403 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4404 break;
4405 case DataType::Type::kFloat32:
4406 case DataType::Type::kFloat64:
4407 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4408 break;
4409 default:
4410 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4411 }
4412 }
4413
VisitMin(HMin * min)4414 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4415 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4416 }
4417
VisitMin(HMin * min)4418 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4419 GenerateMinMax(min, /*is_min*/ true);
4420 }
4421
VisitMax(HMax * max)4422 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4423 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4424 }
4425
VisitMax(HMax * max)4426 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4427 GenerateMinMax(max, /*is_min*/ false);
4428 }
4429
VisitAbs(HAbs * abs)4430 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4431 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4432 switch (abs->GetResultType()) {
4433 case DataType::Type::kInt32:
4434 case DataType::Type::kInt64:
4435 locations->SetInAt(0, Location::RequiresRegister());
4436 locations->SetOut(Location::SameAsFirstInput());
4437 locations->AddTemp(Location::RequiresRegister());
4438 break;
4439 case DataType::Type::kFloat32:
4440 case DataType::Type::kFloat64:
4441 locations->SetInAt(0, Location::RequiresFpuRegister());
4442 locations->SetOut(Location::SameAsFirstInput());
4443 locations->AddTemp(Location::RequiresFpuRegister());
4444 break;
4445 default:
4446 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4447 }
4448 }
4449
VisitAbs(HAbs * abs)4450 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4451 LocationSummary* locations = abs->GetLocations();
4452 switch (abs->GetResultType()) {
4453 case DataType::Type::kInt32: {
4454 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4455 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4456 // Create mask.
4457 __ movl(mask, out);
4458 __ sarl(mask, Immediate(31));
4459 // Add mask.
4460 __ addl(out, mask);
4461 __ xorl(out, mask);
4462 break;
4463 }
4464 case DataType::Type::kInt64: {
4465 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4466 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4467 // Create mask.
4468 __ movq(mask, out);
4469 __ sarq(mask, Immediate(63));
4470 // Add mask.
4471 __ addq(out, mask);
4472 __ xorq(out, mask);
4473 break;
4474 }
4475 case DataType::Type::kFloat32: {
4476 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4477 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4478 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4479 __ andps(out, mask);
4480 break;
4481 }
4482 case DataType::Type::kFloat64: {
4483 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4484 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4485 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4486 __ andpd(out, mask);
4487 break;
4488 }
4489 default:
4490 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4491 }
4492 }
4493
VisitDivZeroCheck(HDivZeroCheck * instruction)4494 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4495 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4496 locations->SetInAt(0, Location::Any());
4497 }
4498
VisitDivZeroCheck(HDivZeroCheck * instruction)4499 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4500 SlowPathCode* slow_path =
4501 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4502 codegen_->AddSlowPath(slow_path);
4503
4504 LocationSummary* locations = instruction->GetLocations();
4505 Location value = locations->InAt(0);
4506
4507 switch (instruction->GetType()) {
4508 case DataType::Type::kBool:
4509 case DataType::Type::kUint8:
4510 case DataType::Type::kInt8:
4511 case DataType::Type::kUint16:
4512 case DataType::Type::kInt16:
4513 case DataType::Type::kInt32: {
4514 if (value.IsRegister()) {
4515 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4516 __ j(kEqual, slow_path->GetEntryLabel());
4517 } else if (value.IsStackSlot()) {
4518 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4519 __ j(kEqual, slow_path->GetEntryLabel());
4520 } else {
4521 DCHECK(value.IsConstant()) << value;
4522 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4523 __ jmp(slow_path->GetEntryLabel());
4524 }
4525 }
4526 break;
4527 }
4528 case DataType::Type::kInt64: {
4529 if (value.IsRegister()) {
4530 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4531 __ j(kEqual, slow_path->GetEntryLabel());
4532 } else if (value.IsDoubleStackSlot()) {
4533 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4534 __ j(kEqual, slow_path->GetEntryLabel());
4535 } else {
4536 DCHECK(value.IsConstant()) << value;
4537 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4538 __ jmp(slow_path->GetEntryLabel());
4539 }
4540 }
4541 break;
4542 }
4543 default:
4544 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4545 }
4546 }
4547
HandleShift(HBinaryOperation * op)4548 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4549 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4550
4551 LocationSummary* locations =
4552 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4553
4554 switch (op->GetResultType()) {
4555 case DataType::Type::kInt32:
4556 case DataType::Type::kInt64: {
4557 locations->SetInAt(0, Location::RequiresRegister());
4558 // The shift count needs to be in CL.
4559 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4560 locations->SetOut(Location::SameAsFirstInput());
4561 break;
4562 }
4563 default:
4564 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4565 }
4566 }
4567
HandleShift(HBinaryOperation * op)4568 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4569 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4570
4571 LocationSummary* locations = op->GetLocations();
4572 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4573 Location second = locations->InAt(1);
4574
4575 switch (op->GetResultType()) {
4576 case DataType::Type::kInt32: {
4577 if (second.IsRegister()) {
4578 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4579 if (op->IsShl()) {
4580 __ shll(first_reg, second_reg);
4581 } else if (op->IsShr()) {
4582 __ sarl(first_reg, second_reg);
4583 } else {
4584 __ shrl(first_reg, second_reg);
4585 }
4586 } else {
4587 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4588 if (op->IsShl()) {
4589 __ shll(first_reg, imm);
4590 } else if (op->IsShr()) {
4591 __ sarl(first_reg, imm);
4592 } else {
4593 __ shrl(first_reg, imm);
4594 }
4595 }
4596 break;
4597 }
4598 case DataType::Type::kInt64: {
4599 if (second.IsRegister()) {
4600 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4601 if (op->IsShl()) {
4602 __ shlq(first_reg, second_reg);
4603 } else if (op->IsShr()) {
4604 __ sarq(first_reg, second_reg);
4605 } else {
4606 __ shrq(first_reg, second_reg);
4607 }
4608 } else {
4609 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4610 if (op->IsShl()) {
4611 __ shlq(first_reg, imm);
4612 } else if (op->IsShr()) {
4613 __ sarq(first_reg, imm);
4614 } else {
4615 __ shrq(first_reg, imm);
4616 }
4617 }
4618 break;
4619 }
4620 default:
4621 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4622 UNREACHABLE();
4623 }
4624 }
4625
VisitRor(HRor * ror)4626 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4627 LocationSummary* locations =
4628 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4629
4630 switch (ror->GetResultType()) {
4631 case DataType::Type::kInt32:
4632 case DataType::Type::kInt64: {
4633 locations->SetInAt(0, Location::RequiresRegister());
4634 // The shift count needs to be in CL (unless it is a constant).
4635 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4636 locations->SetOut(Location::SameAsFirstInput());
4637 break;
4638 }
4639 default:
4640 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4641 UNREACHABLE();
4642 }
4643 }
4644
VisitRor(HRor * ror)4645 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4646 LocationSummary* locations = ror->GetLocations();
4647 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4648 Location second = locations->InAt(1);
4649
4650 switch (ror->GetResultType()) {
4651 case DataType::Type::kInt32:
4652 if (second.IsRegister()) {
4653 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4654 __ rorl(first_reg, second_reg);
4655 } else {
4656 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4657 __ rorl(first_reg, imm);
4658 }
4659 break;
4660 case DataType::Type::kInt64:
4661 if (second.IsRegister()) {
4662 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4663 __ rorq(first_reg, second_reg);
4664 } else {
4665 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4666 __ rorq(first_reg, imm);
4667 }
4668 break;
4669 default:
4670 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4671 UNREACHABLE();
4672 }
4673 }
4674
VisitShl(HShl * shl)4675 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4676 HandleShift(shl);
4677 }
4678
VisitShl(HShl * shl)4679 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4680 HandleShift(shl);
4681 }
4682
VisitShr(HShr * shr)4683 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4684 HandleShift(shr);
4685 }
4686
VisitShr(HShr * shr)4687 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4688 HandleShift(shr);
4689 }
4690
VisitUShr(HUShr * ushr)4691 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4692 HandleShift(ushr);
4693 }
4694
VisitUShr(HUShr * ushr)4695 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4696 HandleShift(ushr);
4697 }
4698
VisitNewInstance(HNewInstance * instruction)4699 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4700 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4701 instruction, LocationSummary::kCallOnMainOnly);
4702 InvokeRuntimeCallingConvention calling_convention;
4703 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4704 locations->SetOut(Location::RegisterLocation(RAX));
4705 }
4706
VisitNewInstance(HNewInstance * instruction)4707 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4708 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4709 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4710 DCHECK(!codegen_->IsLeafMethod());
4711 }
4712
VisitNewArray(HNewArray * instruction)4713 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4714 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4715 instruction, LocationSummary::kCallOnMainOnly);
4716 InvokeRuntimeCallingConvention calling_convention;
4717 locations->SetOut(Location::RegisterLocation(RAX));
4718 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4719 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4720 }
4721
VisitNewArray(HNewArray * instruction)4722 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4723 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4724 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4725 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4726 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4727 DCHECK(!codegen_->IsLeafMethod());
4728 }
4729
VisitParameterValue(HParameterValue * instruction)4730 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4731 LocationSummary* locations =
4732 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4733 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4734 if (location.IsStackSlot()) {
4735 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4736 } else if (location.IsDoubleStackSlot()) {
4737 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4738 }
4739 locations->SetOut(location);
4740 }
4741
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4742 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4743 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4744 // Nothing to do, the parameter is already at its location.
4745 }
4746
VisitCurrentMethod(HCurrentMethod * instruction)4747 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4748 LocationSummary* locations =
4749 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4750 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4751 }
4752
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4753 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4754 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4755 // Nothing to do, the method is already at its location.
4756 }
4757
VisitClassTableGet(HClassTableGet * instruction)4758 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4759 LocationSummary* locations =
4760 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4761 locations->SetInAt(0, Location::RequiresRegister());
4762 locations->SetOut(Location::RequiresRegister());
4763 }
4764
VisitClassTableGet(HClassTableGet * instruction)4765 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4766 LocationSummary* locations = instruction->GetLocations();
4767 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4768 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4769 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4770 __ movq(locations->Out().AsRegister<CpuRegister>(),
4771 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4772 } else {
4773 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4774 instruction->GetIndex(), kX86_64PointerSize));
4775 __ movq(locations->Out().AsRegister<CpuRegister>(),
4776 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4777 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4778 __ movq(locations->Out().AsRegister<CpuRegister>(),
4779 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4780 }
4781 }
4782
VisitNot(HNot * not_)4783 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4784 LocationSummary* locations =
4785 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4786 locations->SetInAt(0, Location::RequiresRegister());
4787 locations->SetOut(Location::SameAsFirstInput());
4788 }
4789
VisitNot(HNot * not_)4790 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4791 LocationSummary* locations = not_->GetLocations();
4792 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4793 locations->Out().AsRegister<CpuRegister>().AsRegister());
4794 Location out = locations->Out();
4795 switch (not_->GetResultType()) {
4796 case DataType::Type::kInt32:
4797 __ notl(out.AsRegister<CpuRegister>());
4798 break;
4799
4800 case DataType::Type::kInt64:
4801 __ notq(out.AsRegister<CpuRegister>());
4802 break;
4803
4804 default:
4805 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4806 }
4807 }
4808
VisitBooleanNot(HBooleanNot * bool_not)4809 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4810 LocationSummary* locations =
4811 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4812 locations->SetInAt(0, Location::RequiresRegister());
4813 locations->SetOut(Location::SameAsFirstInput());
4814 }
4815
VisitBooleanNot(HBooleanNot * bool_not)4816 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4817 LocationSummary* locations = bool_not->GetLocations();
4818 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4819 locations->Out().AsRegister<CpuRegister>().AsRegister());
4820 Location out = locations->Out();
4821 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4822 }
4823
VisitPhi(HPhi * instruction)4824 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4825 LocationSummary* locations =
4826 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4827 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4828 locations->SetInAt(i, Location::Any());
4829 }
4830 locations->SetOut(Location::Any());
4831 }
4832
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4833 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4834 LOG(FATAL) << "Unimplemented";
4835 }
4836
GenerateMemoryBarrier(MemBarrierKind kind)4837 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4838 /*
4839 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4840 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4841 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4842 */
4843 switch (kind) {
4844 case MemBarrierKind::kAnyAny: {
4845 MemoryFence();
4846 break;
4847 }
4848 case MemBarrierKind::kAnyStore:
4849 case MemBarrierKind::kLoadAny:
4850 case MemBarrierKind::kStoreStore: {
4851 // nop
4852 break;
4853 }
4854 case MemBarrierKind::kNTStoreStore:
4855 // Non-Temporal Store/Store needs an explicit fence.
4856 MemoryFence(/* non-temporal= */ true);
4857 break;
4858 }
4859 }
4860
HandleFieldGet(HInstruction * instruction)4861 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4862 DCHECK(instruction->IsInstanceFieldGet() ||
4863 instruction->IsStaticFieldGet() ||
4864 instruction->IsPredicatedInstanceFieldGet());
4865
4866 bool object_field_get_with_read_barrier =
4867 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4868 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
4869 LocationSummary* locations =
4870 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4871 object_field_get_with_read_barrier
4872 ? LocationSummary::kCallOnSlowPath
4873 : LocationSummary::kNoCall);
4874 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4875 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4876 }
4877 // receiver_input
4878 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
4879 if (is_predicated) {
4880 if (DataType::IsFloatingPointType(instruction->GetType())) {
4881 locations->SetInAt(0, Location::RequiresFpuRegister());
4882 } else {
4883 locations->SetInAt(0, Location::RequiresRegister());
4884 }
4885 }
4886 if (DataType::IsFloatingPointType(instruction->GetType())) {
4887 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
4888 : Location::RequiresFpuRegister());
4889 } else {
4890 // The output overlaps for an object field get when read barriers are
4891 // enabled: we do not want the move to overwrite the object's location, as
4892 // we need it to emit the read barrier. For predicated instructions we can
4893 // always overlap since the output is SameAsFirst and the default value.
4894 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
4895 object_field_get_with_read_barrier || is_predicated
4896 ? Location::kOutputOverlap
4897 : Location::kNoOutputOverlap);
4898 }
4899 }
4900
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4901 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4902 const FieldInfo& field_info) {
4903 DCHECK(instruction->IsInstanceFieldGet() ||
4904 instruction->IsStaticFieldGet() ||
4905 instruction->IsPredicatedInstanceFieldGet());
4906
4907 LocationSummary* locations = instruction->GetLocations();
4908 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
4909 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4910 Location out = locations->Out();
4911 bool is_volatile = field_info.IsVolatile();
4912 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4913 DataType::Type load_type = instruction->GetType();
4914 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4915
4916 switch (load_type) {
4917 case DataType::Type::kBool:
4918 case DataType::Type::kUint8: {
4919 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4920 break;
4921 }
4922
4923 case DataType::Type::kInt8: {
4924 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4925 break;
4926 }
4927
4928 case DataType::Type::kUint16: {
4929 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4930 break;
4931 }
4932
4933 case DataType::Type::kInt16: {
4934 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4935 break;
4936 }
4937
4938 case DataType::Type::kInt32: {
4939 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4940 break;
4941 }
4942
4943 case DataType::Type::kReference: {
4944 // /* HeapReference<Object> */ out = *(base + offset)
4945 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4946 // Note that a potential implicit null check is handled in this
4947 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4948 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4949 instruction, out, base, offset, /* needs_null_check= */ true);
4950 if (is_volatile) {
4951 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4952 }
4953 } else {
4954 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4955 codegen_->MaybeRecordImplicitNullCheck(instruction);
4956 if (is_volatile) {
4957 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4958 }
4959 // If read barriers are enabled, emit read barriers other than
4960 // Baker's using a slow path (and also unpoison the loaded
4961 // reference, if heap poisoning is enabled).
4962 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4963 }
4964 break;
4965 }
4966
4967 case DataType::Type::kInt64: {
4968 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4969 break;
4970 }
4971
4972 case DataType::Type::kFloat32: {
4973 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4974 break;
4975 }
4976
4977 case DataType::Type::kFloat64: {
4978 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4979 break;
4980 }
4981
4982 case DataType::Type::kUint32:
4983 case DataType::Type::kUint64:
4984 case DataType::Type::kVoid:
4985 LOG(FATAL) << "Unreachable type " << load_type;
4986 UNREACHABLE();
4987 }
4988
4989 if (load_type == DataType::Type::kReference) {
4990 // Potential implicit null checks, in the case of reference
4991 // fields, are handled in the previous switch statement.
4992 } else {
4993 codegen_->MaybeRecordImplicitNullCheck(instruction);
4994 }
4995
4996 if (is_volatile) {
4997 if (load_type == DataType::Type::kReference) {
4998 // Memory barriers, in the case of references, are also handled
4999 // in the previous switch statement.
5000 } else {
5001 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5002 }
5003 }
5004 }
5005
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5006 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5007 const FieldInfo& field_info) {
5008 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5009
5010 LocationSummary* locations =
5011 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5012 DataType::Type field_type = field_info.GetFieldType();
5013 bool is_volatile = field_info.IsVolatile();
5014 bool needs_write_barrier =
5015 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5016
5017 locations->SetInAt(0, Location::RequiresRegister());
5018 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5019 if (is_volatile) {
5020 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5021 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5022 } else {
5023 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5024 }
5025 } else {
5026 if (is_volatile) {
5027 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5028 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5029 } else {
5030 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5031 }
5032 }
5033 if (needs_write_barrier) {
5034 // Temporary registers for the write barrier.
5035 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
5036 locations->AddTemp(Location::RequiresRegister());
5037 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5038 // Temporary register for the reference poisoning.
5039 locations->AddTemp(Location::RequiresRegister());
5040 }
5041 }
5042
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5043 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5044 const FieldInfo& field_info,
5045 bool value_can_be_null) {
5046 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5047
5048 LocationSummary* locations = instruction->GetLocations();
5049 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5050 Location value = locations->InAt(1);
5051 bool is_volatile = field_info.IsVolatile();
5052 DataType::Type field_type = field_info.GetFieldType();
5053 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5054 bool is_predicated =
5055 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5056
5057 if (is_volatile) {
5058 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5059 }
5060
5061 bool maybe_record_implicit_null_check_done = false;
5062
5063 NearLabel pred_is_null;
5064 if (is_predicated) {
5065 __ testl(base, base);
5066 __ j(kZero, &pred_is_null);
5067 }
5068
5069 switch (field_type) {
5070 case DataType::Type::kBool:
5071 case DataType::Type::kUint8:
5072 case DataType::Type::kInt8: {
5073 if (value.IsConstant()) {
5074 __ movb(Address(base, offset),
5075 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5076 } else {
5077 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
5078 }
5079 break;
5080 }
5081
5082 case DataType::Type::kUint16:
5083 case DataType::Type::kInt16: {
5084 if (value.IsConstant()) {
5085 __ movw(Address(base, offset),
5086 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5087 } else {
5088 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
5089 }
5090 break;
5091 }
5092
5093 case DataType::Type::kInt32:
5094 case DataType::Type::kReference: {
5095 if (value.IsConstant()) {
5096 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5097 // `field_type == DataType::Type::kReference` implies `v == 0`.
5098 DCHECK((field_type != DataType::Type::kReference) || (v == 0));
5099 // Note: if heap poisoning is enabled, no need to poison
5100 // (negate) `v` if it is a reference, as it would be null.
5101 __ movl(Address(base, offset), Immediate(v));
5102 } else {
5103 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5104 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5105 __ movl(temp, value.AsRegister<CpuRegister>());
5106 __ PoisonHeapReference(temp);
5107 __ movl(Address(base, offset), temp);
5108 } else {
5109 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
5110 }
5111 }
5112 break;
5113 }
5114
5115 case DataType::Type::kInt64: {
5116 if (value.IsConstant()) {
5117 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5118 codegen_->MoveInt64ToAddress(Address(base, offset),
5119 Address(base, offset + sizeof(int32_t)),
5120 v,
5121 instruction);
5122 maybe_record_implicit_null_check_done = true;
5123 } else {
5124 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
5125 }
5126 break;
5127 }
5128
5129 case DataType::Type::kFloat32: {
5130 if (value.IsConstant()) {
5131 int32_t v =
5132 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5133 __ movl(Address(base, offset), Immediate(v));
5134 } else {
5135 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5136 }
5137 break;
5138 }
5139
5140 case DataType::Type::kFloat64: {
5141 if (value.IsConstant()) {
5142 int64_t v =
5143 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5144 codegen_->MoveInt64ToAddress(Address(base, offset),
5145 Address(base, offset + sizeof(int32_t)),
5146 v,
5147 instruction);
5148 maybe_record_implicit_null_check_done = true;
5149 } else {
5150 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5151 }
5152 break;
5153 }
5154
5155 case DataType::Type::kUint32:
5156 case DataType::Type::kUint64:
5157 case DataType::Type::kVoid:
5158 LOG(FATAL) << "Unreachable type " << field_type;
5159 UNREACHABLE();
5160 }
5161
5162 if (!maybe_record_implicit_null_check_done) {
5163 codegen_->MaybeRecordImplicitNullCheck(instruction);
5164 }
5165
5166 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5167 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5168 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5169 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
5170 }
5171
5172 if (is_volatile) {
5173 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5174 }
5175
5176 if (is_predicated) {
5177 __ Bind(&pred_is_null);
5178 }
5179 }
5180
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5181 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5182 HandleFieldSet(instruction, instruction->GetFieldInfo());
5183 }
5184
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5185 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5186 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5187 }
5188
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5189 void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
5190 HPredicatedInstanceFieldGet* instruction) {
5191 HandleFieldGet(instruction);
5192 }
5193
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5194 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5195 HandleFieldGet(instruction);
5196 }
5197
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5198 void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
5199 HPredicatedInstanceFieldGet* instruction) {
5200 NearLabel finish;
5201 LocationSummary* locations = instruction->GetLocations();
5202 CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
5203 __ testl(target, target);
5204 __ j(kZero, &finish);
5205 HandleFieldGet(instruction, instruction->GetFieldInfo());
5206 __ Bind(&finish);
5207 }
5208
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5209 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5210 HandleFieldGet(instruction, instruction->GetFieldInfo());
5211 }
5212
VisitStaticFieldGet(HStaticFieldGet * instruction)5213 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5214 HandleFieldGet(instruction);
5215 }
5216
VisitStaticFieldGet(HStaticFieldGet * instruction)5217 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5218 HandleFieldGet(instruction, instruction->GetFieldInfo());
5219 }
5220
VisitStaticFieldSet(HStaticFieldSet * instruction)5221 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5222 HandleFieldSet(instruction, instruction->GetFieldInfo());
5223 }
5224
VisitStaticFieldSet(HStaticFieldSet * instruction)5225 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5226 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5227 }
5228
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5229 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5230 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5231 }
5232
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5233 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5234 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5235 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5236 }
5237
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5238 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5239 HUnresolvedInstanceFieldGet* instruction) {
5240 FieldAccessCallingConventionX86_64 calling_convention;
5241 codegen_->CreateUnresolvedFieldLocationSummary(
5242 instruction, instruction->GetFieldType(), calling_convention);
5243 }
5244
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5245 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5246 HUnresolvedInstanceFieldGet* instruction) {
5247 FieldAccessCallingConventionX86_64 calling_convention;
5248 codegen_->GenerateUnresolvedFieldAccess(instruction,
5249 instruction->GetFieldType(),
5250 instruction->GetFieldIndex(),
5251 instruction->GetDexPc(),
5252 calling_convention);
5253 }
5254
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5255 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5256 HUnresolvedInstanceFieldSet* instruction) {
5257 FieldAccessCallingConventionX86_64 calling_convention;
5258 codegen_->CreateUnresolvedFieldLocationSummary(
5259 instruction, instruction->GetFieldType(), calling_convention);
5260 }
5261
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5262 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5263 HUnresolvedInstanceFieldSet* instruction) {
5264 FieldAccessCallingConventionX86_64 calling_convention;
5265 codegen_->GenerateUnresolvedFieldAccess(instruction,
5266 instruction->GetFieldType(),
5267 instruction->GetFieldIndex(),
5268 instruction->GetDexPc(),
5269 calling_convention);
5270 }
5271
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5272 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5273 HUnresolvedStaticFieldGet* instruction) {
5274 FieldAccessCallingConventionX86_64 calling_convention;
5275 codegen_->CreateUnresolvedFieldLocationSummary(
5276 instruction, instruction->GetFieldType(), calling_convention);
5277 }
5278
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5279 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5280 HUnresolvedStaticFieldGet* instruction) {
5281 FieldAccessCallingConventionX86_64 calling_convention;
5282 codegen_->GenerateUnresolvedFieldAccess(instruction,
5283 instruction->GetFieldType(),
5284 instruction->GetFieldIndex(),
5285 instruction->GetDexPc(),
5286 calling_convention);
5287 }
5288
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5289 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5290 HUnresolvedStaticFieldSet* instruction) {
5291 FieldAccessCallingConventionX86_64 calling_convention;
5292 codegen_->CreateUnresolvedFieldLocationSummary(
5293 instruction, instruction->GetFieldType(), calling_convention);
5294 }
5295
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5296 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5297 HUnresolvedStaticFieldSet* instruction) {
5298 FieldAccessCallingConventionX86_64 calling_convention;
5299 codegen_->GenerateUnresolvedFieldAccess(instruction,
5300 instruction->GetFieldType(),
5301 instruction->GetFieldIndex(),
5302 instruction->GetDexPc(),
5303 calling_convention);
5304 }
5305
VisitNullCheck(HNullCheck * instruction)5306 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5307 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5308 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5309 ? Location::RequiresRegister()
5310 : Location::Any();
5311 locations->SetInAt(0, loc);
5312 }
5313
GenerateImplicitNullCheck(HNullCheck * instruction)5314 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5315 if (CanMoveNullCheckToUser(instruction)) {
5316 return;
5317 }
5318 LocationSummary* locations = instruction->GetLocations();
5319 Location obj = locations->InAt(0);
5320
5321 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5322 RecordPcInfo(instruction, instruction->GetDexPc());
5323 }
5324
GenerateExplicitNullCheck(HNullCheck * instruction)5325 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5326 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5327 AddSlowPath(slow_path);
5328
5329 LocationSummary* locations = instruction->GetLocations();
5330 Location obj = locations->InAt(0);
5331
5332 if (obj.IsRegister()) {
5333 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5334 } else if (obj.IsStackSlot()) {
5335 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5336 } else {
5337 DCHECK(obj.IsConstant()) << obj;
5338 DCHECK(obj.GetConstant()->IsNullConstant());
5339 __ jmp(slow_path->GetEntryLabel());
5340 return;
5341 }
5342 __ j(kEqual, slow_path->GetEntryLabel());
5343 }
5344
VisitNullCheck(HNullCheck * instruction)5345 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5346 codegen_->GenerateNullCheck(instruction);
5347 }
5348
VisitArrayGet(HArrayGet * instruction)5349 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5350 bool object_array_get_with_read_barrier =
5351 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5352 LocationSummary* locations =
5353 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5354 object_array_get_with_read_barrier
5355 ? LocationSummary::kCallOnSlowPath
5356 : LocationSummary::kNoCall);
5357 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5358 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5359 }
5360 locations->SetInAt(0, Location::RequiresRegister());
5361 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5362 if (DataType::IsFloatingPointType(instruction->GetType())) {
5363 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5364 } else {
5365 // The output overlaps for an object array get when read barriers
5366 // are enabled: we do not want the move to overwrite the array's
5367 // location, as we need it to emit the read barrier.
5368 locations->SetOut(
5369 Location::RequiresRegister(),
5370 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5371 }
5372 }
5373
VisitArrayGet(HArrayGet * instruction)5374 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5375 LocationSummary* locations = instruction->GetLocations();
5376 Location obj_loc = locations->InAt(0);
5377 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5378 Location index = locations->InAt(1);
5379 Location out_loc = locations->Out();
5380 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5381
5382 DataType::Type type = instruction->GetType();
5383 switch (type) {
5384 case DataType::Type::kBool:
5385 case DataType::Type::kUint8: {
5386 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5387 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5388 break;
5389 }
5390
5391 case DataType::Type::kInt8: {
5392 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5393 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5394 break;
5395 }
5396
5397 case DataType::Type::kUint16: {
5398 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5399 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5400 // Branch cases into compressed and uncompressed for each index's type.
5401 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5402 NearLabel done, not_compressed;
5403 __ testb(Address(obj, count_offset), Immediate(1));
5404 codegen_->MaybeRecordImplicitNullCheck(instruction);
5405 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5406 "Expecting 0=compressed, 1=uncompressed");
5407 __ j(kNotZero, ¬_compressed);
5408 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5409 __ jmp(&done);
5410 __ Bind(¬_compressed);
5411 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5412 __ Bind(&done);
5413 } else {
5414 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5415 }
5416 break;
5417 }
5418
5419 case DataType::Type::kInt16: {
5420 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5421 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5422 break;
5423 }
5424
5425 case DataType::Type::kInt32: {
5426 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5427 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5428 break;
5429 }
5430
5431 case DataType::Type::kReference: {
5432 static_assert(
5433 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5434 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5435 // /* HeapReference<Object> */ out =
5436 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5437 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5438 // Note that a potential implicit null check is handled in this
5439 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5440 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5441 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5442 } else {
5443 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5444 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5445 codegen_->MaybeRecordImplicitNullCheck(instruction);
5446 // If read barriers are enabled, emit read barriers other than
5447 // Baker's using a slow path (and also unpoison the loaded
5448 // reference, if heap poisoning is enabled).
5449 if (index.IsConstant()) {
5450 uint32_t offset =
5451 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5452 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5453 } else {
5454 codegen_->MaybeGenerateReadBarrierSlow(
5455 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5456 }
5457 }
5458 break;
5459 }
5460
5461 case DataType::Type::kInt64: {
5462 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5463 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5464 break;
5465 }
5466
5467 case DataType::Type::kFloat32: {
5468 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5469 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5470 break;
5471 }
5472
5473 case DataType::Type::kFloat64: {
5474 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5475 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5476 break;
5477 }
5478
5479 case DataType::Type::kUint32:
5480 case DataType::Type::kUint64:
5481 case DataType::Type::kVoid:
5482 LOG(FATAL) << "Unreachable type " << type;
5483 UNREACHABLE();
5484 }
5485
5486 if (type == DataType::Type::kReference) {
5487 // Potential implicit null checks, in the case of reference
5488 // arrays, are handled in the previous switch statement.
5489 } else {
5490 codegen_->MaybeRecordImplicitNullCheck(instruction);
5491 }
5492 }
5493
VisitArraySet(HArraySet * instruction)5494 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5495 DataType::Type value_type = instruction->GetComponentType();
5496
5497 bool needs_write_barrier =
5498 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5499 bool needs_type_check = instruction->NeedsTypeCheck();
5500
5501 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5502 instruction,
5503 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5504
5505 locations->SetInAt(0, Location::RequiresRegister());
5506 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5507 if (DataType::IsFloatingPointType(value_type)) {
5508 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5509 } else {
5510 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5511 }
5512
5513 if (needs_write_barrier) {
5514 // Temporary registers for the write barrier.
5515 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5516 locations->AddTemp(Location::RequiresRegister());
5517 }
5518 }
5519
VisitArraySet(HArraySet * instruction)5520 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5521 LocationSummary* locations = instruction->GetLocations();
5522 Location array_loc = locations->InAt(0);
5523 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5524 Location index = locations->InAt(1);
5525 Location value = locations->InAt(2);
5526 DataType::Type value_type = instruction->GetComponentType();
5527 bool needs_type_check = instruction->NeedsTypeCheck();
5528 bool needs_write_barrier =
5529 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5530
5531 switch (value_type) {
5532 case DataType::Type::kBool:
5533 case DataType::Type::kUint8:
5534 case DataType::Type::kInt8: {
5535 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5536 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5537 if (value.IsRegister()) {
5538 __ movb(address, value.AsRegister<CpuRegister>());
5539 } else {
5540 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5541 }
5542 codegen_->MaybeRecordImplicitNullCheck(instruction);
5543 break;
5544 }
5545
5546 case DataType::Type::kUint16:
5547 case DataType::Type::kInt16: {
5548 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5549 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5550 if (value.IsRegister()) {
5551 __ movw(address, value.AsRegister<CpuRegister>());
5552 } else {
5553 DCHECK(value.IsConstant()) << value;
5554 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5555 }
5556 codegen_->MaybeRecordImplicitNullCheck(instruction);
5557 break;
5558 }
5559
5560 case DataType::Type::kReference: {
5561 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5562 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5563
5564 if (!value.IsRegister()) {
5565 // Just setting null.
5566 DCHECK(instruction->InputAt(2)->IsNullConstant());
5567 DCHECK(value.IsConstant()) << value;
5568 __ movl(address, Immediate(0));
5569 codegen_->MaybeRecordImplicitNullCheck(instruction);
5570 DCHECK(!needs_write_barrier);
5571 DCHECK(!needs_type_check);
5572 break;
5573 }
5574
5575 DCHECK(needs_write_barrier);
5576 CpuRegister register_value = value.AsRegister<CpuRegister>();
5577 Location temp_loc = locations->GetTemp(0);
5578 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5579
5580 bool can_value_be_null = instruction->GetValueCanBeNull();
5581 NearLabel do_store;
5582 if (can_value_be_null) {
5583 __ testl(register_value, register_value);
5584 __ j(kEqual, &do_store);
5585 }
5586
5587 SlowPathCode* slow_path = nullptr;
5588 if (needs_type_check) {
5589 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5590 codegen_->AddSlowPath(slow_path);
5591
5592 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5593 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5594 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5595
5596 // Note that when Baker read barriers are enabled, the type
5597 // checks are performed without read barriers. This is fine,
5598 // even in the case where a class object is in the from-space
5599 // after the flip, as a comparison involving such a type would
5600 // not produce a false positive; it may of course produce a
5601 // false negative, in which case we would take the ArraySet
5602 // slow path.
5603
5604 // /* HeapReference<Class> */ temp = array->klass_
5605 __ movl(temp, Address(array, class_offset));
5606 codegen_->MaybeRecordImplicitNullCheck(instruction);
5607 __ MaybeUnpoisonHeapReference(temp);
5608
5609 // /* HeapReference<Class> */ temp = temp->component_type_
5610 __ movl(temp, Address(temp, component_offset));
5611 // If heap poisoning is enabled, no need to unpoison `temp`
5612 // nor the object reference in `register_value->klass`, as
5613 // we are comparing two poisoned references.
5614 __ cmpl(temp, Address(register_value, class_offset));
5615
5616 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5617 NearLabel do_put;
5618 __ j(kEqual, &do_put);
5619 // If heap poisoning is enabled, the `temp` reference has
5620 // not been unpoisoned yet; unpoison it now.
5621 __ MaybeUnpoisonHeapReference(temp);
5622
5623 // If heap poisoning is enabled, no need to unpoison the
5624 // heap reference loaded below, as it is only used for a
5625 // comparison with null.
5626 __ cmpl(Address(temp, super_offset), Immediate(0));
5627 __ j(kNotEqual, slow_path->GetEntryLabel());
5628 __ Bind(&do_put);
5629 } else {
5630 __ j(kNotEqual, slow_path->GetEntryLabel());
5631 }
5632 }
5633
5634 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5635 codegen_->MarkGCCard(
5636 temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5637
5638 if (can_value_be_null) {
5639 DCHECK(do_store.IsLinked());
5640 __ Bind(&do_store);
5641 }
5642
5643 Location source = value;
5644 if (kPoisonHeapReferences) {
5645 __ movl(temp, register_value);
5646 __ PoisonHeapReference(temp);
5647 source = temp_loc;
5648 }
5649
5650 __ movl(address, source.AsRegister<CpuRegister>());
5651
5652 if (can_value_be_null || !needs_type_check) {
5653 codegen_->MaybeRecordImplicitNullCheck(instruction);
5654 }
5655
5656 if (slow_path != nullptr) {
5657 __ Bind(slow_path->GetExitLabel());
5658 }
5659
5660 break;
5661 }
5662
5663 case DataType::Type::kInt32: {
5664 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5665 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5666 if (value.IsRegister()) {
5667 __ movl(address, value.AsRegister<CpuRegister>());
5668 } else {
5669 DCHECK(value.IsConstant()) << value;
5670 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5671 __ movl(address, Immediate(v));
5672 }
5673 codegen_->MaybeRecordImplicitNullCheck(instruction);
5674 break;
5675 }
5676
5677 case DataType::Type::kInt64: {
5678 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5679 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5680 if (value.IsRegister()) {
5681 __ movq(address, value.AsRegister<CpuRegister>());
5682 codegen_->MaybeRecordImplicitNullCheck(instruction);
5683 } else {
5684 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5685 Address address_high =
5686 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5687 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5688 }
5689 break;
5690 }
5691
5692 case DataType::Type::kFloat32: {
5693 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5694 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5695 if (value.IsFpuRegister()) {
5696 __ movss(address, value.AsFpuRegister<XmmRegister>());
5697 } else {
5698 DCHECK(value.IsConstant());
5699 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5700 __ movl(address, Immediate(v));
5701 }
5702 codegen_->MaybeRecordImplicitNullCheck(instruction);
5703 break;
5704 }
5705
5706 case DataType::Type::kFloat64: {
5707 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5708 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5709 if (value.IsFpuRegister()) {
5710 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5711 codegen_->MaybeRecordImplicitNullCheck(instruction);
5712 } else {
5713 int64_t v =
5714 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5715 Address address_high =
5716 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5717 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5718 }
5719 break;
5720 }
5721
5722 case DataType::Type::kUint32:
5723 case DataType::Type::kUint64:
5724 case DataType::Type::kVoid:
5725 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5726 UNREACHABLE();
5727 }
5728 }
5729
VisitArrayLength(HArrayLength * instruction)5730 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5731 LocationSummary* locations =
5732 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5733 locations->SetInAt(0, Location::RequiresRegister());
5734 if (!instruction->IsEmittedAtUseSite()) {
5735 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5736 }
5737 }
5738
VisitArrayLength(HArrayLength * instruction)5739 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5740 if (instruction->IsEmittedAtUseSite()) {
5741 return;
5742 }
5743
5744 LocationSummary* locations = instruction->GetLocations();
5745 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5746 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5747 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5748 __ movl(out, Address(obj, offset));
5749 codegen_->MaybeRecordImplicitNullCheck(instruction);
5750 // Mask out most significant bit in case the array is String's array of char.
5751 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5752 __ shrl(out, Immediate(1));
5753 }
5754 }
5755
VisitBoundsCheck(HBoundsCheck * instruction)5756 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5757 RegisterSet caller_saves = RegisterSet::Empty();
5758 InvokeRuntimeCallingConvention calling_convention;
5759 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5760 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5761 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5762 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5763 HInstruction* length = instruction->InputAt(1);
5764 if (!length->IsEmittedAtUseSite()) {
5765 locations->SetInAt(1, Location::RegisterOrConstant(length));
5766 }
5767 }
5768
VisitBoundsCheck(HBoundsCheck * instruction)5769 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5770 LocationSummary* locations = instruction->GetLocations();
5771 Location index_loc = locations->InAt(0);
5772 Location length_loc = locations->InAt(1);
5773 SlowPathCode* slow_path =
5774 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5775
5776 if (length_loc.IsConstant()) {
5777 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5778 if (index_loc.IsConstant()) {
5779 // BCE will remove the bounds check if we are guarenteed to pass.
5780 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5781 if (index < 0 || index >= length) {
5782 codegen_->AddSlowPath(slow_path);
5783 __ jmp(slow_path->GetEntryLabel());
5784 } else {
5785 // Some optimization after BCE may have generated this, and we should not
5786 // generate a bounds check if it is a valid range.
5787 }
5788 return;
5789 }
5790
5791 // We have to reverse the jump condition because the length is the constant.
5792 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5793 __ cmpl(index_reg, Immediate(length));
5794 codegen_->AddSlowPath(slow_path);
5795 __ j(kAboveEqual, slow_path->GetEntryLabel());
5796 } else {
5797 HInstruction* array_length = instruction->InputAt(1);
5798 if (array_length->IsEmittedAtUseSite()) {
5799 // Address the length field in the array.
5800 DCHECK(array_length->IsArrayLength());
5801 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5802 Location array_loc = array_length->GetLocations()->InAt(0);
5803 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5804 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5805 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5806 // the string compression flag) with the in-memory length and avoid the temporary.
5807 CpuRegister length_reg = CpuRegister(TMP);
5808 __ movl(length_reg, array_len);
5809 codegen_->MaybeRecordImplicitNullCheck(array_length);
5810 __ shrl(length_reg, Immediate(1));
5811 codegen_->GenerateIntCompare(length_reg, index_loc);
5812 } else {
5813 // Checking the bound for general case:
5814 // Array of char or String's array when the compression feature off.
5815 if (index_loc.IsConstant()) {
5816 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5817 __ cmpl(array_len, Immediate(value));
5818 } else {
5819 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5820 }
5821 codegen_->MaybeRecordImplicitNullCheck(array_length);
5822 }
5823 } else {
5824 codegen_->GenerateIntCompare(length_loc, index_loc);
5825 }
5826 codegen_->AddSlowPath(slow_path);
5827 __ j(kBelowEqual, slow_path->GetEntryLabel());
5828 }
5829 }
5830
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5831 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5832 CpuRegister card,
5833 CpuRegister object,
5834 CpuRegister value,
5835 bool value_can_be_null) {
5836 NearLabel is_null;
5837 if (value_can_be_null) {
5838 __ testl(value, value);
5839 __ j(kEqual, &is_null);
5840 }
5841 // Load the address of the card table into `card`.
5842 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5843 /* no_rip= */ true));
5844 // Calculate the offset (in the card table) of the card corresponding to
5845 // `object`.
5846 __ movq(temp, object);
5847 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5848 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5849 // `object`'s card.
5850 //
5851 // Register `card` contains the address of the card table. Note that the card
5852 // table's base is biased during its creation so that it always starts at an
5853 // address whose least-significant byte is equal to `kCardDirty` (see
5854 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5855 // below writes the `kCardDirty` (byte) value into the `object`'s card
5856 // (located at `card + object >> kCardShift`).
5857 //
5858 // This dual use of the value in register `card` (1. to calculate the location
5859 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5860 // (no need to explicitly load `kCardDirty` as an immediate value).
5861 __ movb(Address(temp, card, TIMES_1, 0), card);
5862 if (value_can_be_null) {
5863 __ Bind(&is_null);
5864 }
5865 }
5866
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5867 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5868 LOG(FATAL) << "Unimplemented";
5869 }
5870
VisitParallelMove(HParallelMove * instruction)5871 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5872 if (instruction->GetNext()->IsSuspendCheck() &&
5873 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5874 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5875 // The back edge will generate the suspend check.
5876 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5877 }
5878
5879 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5880 }
5881
VisitSuspendCheck(HSuspendCheck * instruction)5882 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5883 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5884 instruction, LocationSummary::kCallOnSlowPath);
5885 // In suspend check slow path, usually there are no caller-save registers at all.
5886 // If SIMD instructions are present, however, we force spilling all live SIMD
5887 // registers in full width (since the runtime only saves/restores lower part).
5888 locations->SetCustomSlowPathCallerSaves(
5889 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5890 }
5891
VisitSuspendCheck(HSuspendCheck * instruction)5892 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5893 HBasicBlock* block = instruction->GetBlock();
5894 if (block->GetLoopInformation() != nullptr) {
5895 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5896 // The back edge will generate the suspend check.
5897 return;
5898 }
5899 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5900 // The goto will generate the suspend check.
5901 return;
5902 }
5903 GenerateSuspendCheck(instruction, nullptr);
5904 }
5905
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5906 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5907 HBasicBlock* successor) {
5908 SuspendCheckSlowPathX86_64* slow_path =
5909 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5910 if (slow_path == nullptr) {
5911 slow_path =
5912 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5913 instruction->SetSlowPath(slow_path);
5914 codegen_->AddSlowPath(slow_path);
5915 if (successor != nullptr) {
5916 DCHECK(successor->IsLoopHeader());
5917 }
5918 } else {
5919 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5920 }
5921
5922 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5923 /* no_rip= */ true),
5924 Immediate(0));
5925 if (successor == nullptr) {
5926 __ j(kNotEqual, slow_path->GetEntryLabel());
5927 __ Bind(slow_path->GetReturnLabel());
5928 } else {
5929 __ j(kEqual, codegen_->GetLabelOf(successor));
5930 __ jmp(slow_path->GetEntryLabel());
5931 }
5932 }
5933
GetAssembler() const5934 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5935 return codegen_->GetAssembler();
5936 }
5937
EmitMove(size_t index)5938 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5939 MoveOperands* move = moves_[index];
5940 Location source = move->GetSource();
5941 Location destination = move->GetDestination();
5942
5943 if (source.IsRegister()) {
5944 if (destination.IsRegister()) {
5945 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5946 } else if (destination.IsStackSlot()) {
5947 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5948 source.AsRegister<CpuRegister>());
5949 } else {
5950 DCHECK(destination.IsDoubleStackSlot());
5951 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5952 source.AsRegister<CpuRegister>());
5953 }
5954 } else if (source.IsStackSlot()) {
5955 if (destination.IsRegister()) {
5956 __ movl(destination.AsRegister<CpuRegister>(),
5957 Address(CpuRegister(RSP), source.GetStackIndex()));
5958 } else if (destination.IsFpuRegister()) {
5959 __ movss(destination.AsFpuRegister<XmmRegister>(),
5960 Address(CpuRegister(RSP), source.GetStackIndex()));
5961 } else {
5962 DCHECK(destination.IsStackSlot());
5963 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5964 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5965 }
5966 } else if (source.IsDoubleStackSlot()) {
5967 if (destination.IsRegister()) {
5968 __ movq(destination.AsRegister<CpuRegister>(),
5969 Address(CpuRegister(RSP), source.GetStackIndex()));
5970 } else if (destination.IsFpuRegister()) {
5971 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5972 Address(CpuRegister(RSP), source.GetStackIndex()));
5973 } else {
5974 DCHECK(destination.IsDoubleStackSlot()) << destination;
5975 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5976 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5977 }
5978 } else if (source.IsSIMDStackSlot()) {
5979 if (destination.IsFpuRegister()) {
5980 __ movups(destination.AsFpuRegister<XmmRegister>(),
5981 Address(CpuRegister(RSP), source.GetStackIndex()));
5982 } else {
5983 DCHECK(destination.IsSIMDStackSlot());
5984 size_t high = kX86_64WordSize;
5985 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5986 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5987 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5988 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5989 }
5990 } else if (source.IsConstant()) {
5991 HConstant* constant = source.GetConstant();
5992 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5993 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5994 if (destination.IsRegister()) {
5995 if (value == 0) {
5996 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5997 } else {
5998 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5999 }
6000 } else {
6001 DCHECK(destination.IsStackSlot()) << destination;
6002 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6003 }
6004 } else if (constant->IsLongConstant()) {
6005 int64_t value = constant->AsLongConstant()->GetValue();
6006 if (destination.IsRegister()) {
6007 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6008 } else {
6009 DCHECK(destination.IsDoubleStackSlot()) << destination;
6010 codegen_->Store64BitValueToStack(destination, value);
6011 }
6012 } else if (constant->IsFloatConstant()) {
6013 float fp_value = constant->AsFloatConstant()->GetValue();
6014 if (destination.IsFpuRegister()) {
6015 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6016 codegen_->Load32BitValue(dest, fp_value);
6017 } else {
6018 DCHECK(destination.IsStackSlot()) << destination;
6019 Immediate imm(bit_cast<int32_t, float>(fp_value));
6020 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6021 }
6022 } else {
6023 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6024 double fp_value = constant->AsDoubleConstant()->GetValue();
6025 int64_t value = bit_cast<int64_t, double>(fp_value);
6026 if (destination.IsFpuRegister()) {
6027 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6028 codegen_->Load64BitValue(dest, fp_value);
6029 } else {
6030 DCHECK(destination.IsDoubleStackSlot()) << destination;
6031 codegen_->Store64BitValueToStack(destination, value);
6032 }
6033 }
6034 } else if (source.IsFpuRegister()) {
6035 if (destination.IsFpuRegister()) {
6036 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6037 } else if (destination.IsStackSlot()) {
6038 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6039 source.AsFpuRegister<XmmRegister>());
6040 } else if (destination.IsDoubleStackSlot()) {
6041 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6042 source.AsFpuRegister<XmmRegister>());
6043 } else {
6044 DCHECK(destination.IsSIMDStackSlot());
6045 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6046 source.AsFpuRegister<XmmRegister>());
6047 }
6048 }
6049 }
6050
Exchange32(CpuRegister reg,int mem)6051 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6052 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6053 __ movl(Address(CpuRegister(RSP), mem), reg);
6054 __ movl(reg, CpuRegister(TMP));
6055 }
6056
Exchange64(CpuRegister reg1,CpuRegister reg2)6057 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6058 __ movq(CpuRegister(TMP), reg1);
6059 __ movq(reg1, reg2);
6060 __ movq(reg2, CpuRegister(TMP));
6061 }
6062
Exchange64(CpuRegister reg,int mem)6063 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6064 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6065 __ movq(Address(CpuRegister(RSP), mem), reg);
6066 __ movq(reg, CpuRegister(TMP));
6067 }
6068
Exchange32(XmmRegister reg,int mem)6069 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6070 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6071 __ movss(Address(CpuRegister(RSP), mem), reg);
6072 __ movd(reg, CpuRegister(TMP));
6073 }
6074
Exchange64(XmmRegister reg,int mem)6075 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6076 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6077 __ movsd(Address(CpuRegister(RSP), mem), reg);
6078 __ movd(reg, CpuRegister(TMP));
6079 }
6080
Exchange128(XmmRegister reg,int mem)6081 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6082 size_t extra_slot = 2 * kX86_64WordSize;
6083 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6084 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6085 ExchangeMemory64(0, mem + extra_slot, 2);
6086 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6087 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6088 }
6089
ExchangeMemory32(int mem1,int mem2)6090 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6091 ScratchRegisterScope ensure_scratch(
6092 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6093
6094 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6095 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6096 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6097 Address(CpuRegister(RSP), mem2 + stack_offset));
6098 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6099 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6100 CpuRegister(ensure_scratch.GetRegister()));
6101 }
6102
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6103 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6104 ScratchRegisterScope ensure_scratch(
6105 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6106
6107 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6108
6109 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6110 for (int i = 0; i < num_of_qwords; i++) {
6111 __ movq(CpuRegister(TMP),
6112 Address(CpuRegister(RSP), mem1 + stack_offset));
6113 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6114 Address(CpuRegister(RSP), mem2 + stack_offset));
6115 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6116 CpuRegister(TMP));
6117 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6118 CpuRegister(ensure_scratch.GetRegister()));
6119 stack_offset += kX86_64WordSize;
6120 }
6121 }
6122
EmitSwap(size_t index)6123 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6124 MoveOperands* move = moves_[index];
6125 Location source = move->GetSource();
6126 Location destination = move->GetDestination();
6127
6128 if (source.IsRegister() && destination.IsRegister()) {
6129 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6130 } else if (source.IsRegister() && destination.IsStackSlot()) {
6131 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6132 } else if (source.IsStackSlot() && destination.IsRegister()) {
6133 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6134 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6135 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6136 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6137 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6138 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6139 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6140 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6141 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6142 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6143 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6144 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6145 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6146 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6147 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6148 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6149 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6150 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6151 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6152 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6153 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6154 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6155 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6156 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6157 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6158 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6159 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6160 } else {
6161 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6162 }
6163 }
6164
6165
SpillScratch(int reg)6166 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6167 __ pushq(CpuRegister(reg));
6168 }
6169
6170
RestoreScratch(int reg)6171 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6172 __ popq(CpuRegister(reg));
6173 }
6174
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6175 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6176 SlowPathCode* slow_path, CpuRegister class_reg) {
6177 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6178 const size_t status_byte_offset =
6179 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6180 constexpr uint32_t shifted_visibly_initialized_value =
6181 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6182
6183 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
6184 __ j(kBelow, slow_path->GetEntryLabel());
6185 __ Bind(slow_path->GetExitLabel());
6186 }
6187
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6188 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6189 CpuRegister temp) {
6190 uint32_t path_to_root = check->GetBitstringPathToRoot();
6191 uint32_t mask = check->GetBitstringMask();
6192 DCHECK(IsPowerOfTwo(mask + 1));
6193 size_t mask_bits = WhichPowerOf2(mask + 1);
6194
6195 if (mask_bits == 16u) {
6196 // Compare the bitstring in memory.
6197 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6198 } else {
6199 // /* uint32_t */ temp = temp->status_
6200 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6201 // Compare the bitstring bits using SUB.
6202 __ subl(temp, Immediate(path_to_root));
6203 // Shift out bits that do not contribute to the comparison.
6204 __ shll(temp, Immediate(32u - mask_bits));
6205 }
6206 }
6207
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6208 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6209 HLoadClass::LoadKind desired_class_load_kind) {
6210 switch (desired_class_load_kind) {
6211 case HLoadClass::LoadKind::kInvalid:
6212 LOG(FATAL) << "UNREACHABLE";
6213 UNREACHABLE();
6214 case HLoadClass::LoadKind::kReferrersClass:
6215 break;
6216 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6217 case HLoadClass::LoadKind::kBootImageRelRo:
6218 case HLoadClass::LoadKind::kBssEntry:
6219 case HLoadClass::LoadKind::kBssEntryPublic:
6220 case HLoadClass::LoadKind::kBssEntryPackage:
6221 DCHECK(!GetCompilerOptions().IsJitCompiler());
6222 break;
6223 case HLoadClass::LoadKind::kJitBootImageAddress:
6224 case HLoadClass::LoadKind::kJitTableAddress:
6225 DCHECK(GetCompilerOptions().IsJitCompiler());
6226 break;
6227 case HLoadClass::LoadKind::kRuntimeCall:
6228 break;
6229 }
6230 return desired_class_load_kind;
6231 }
6232
VisitLoadClass(HLoadClass * cls)6233 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6234 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6235 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6236 // Custom calling convention: RAX serves as both input and output.
6237 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6238 cls,
6239 Location::RegisterLocation(RAX),
6240 Location::RegisterLocation(RAX));
6241 return;
6242 }
6243 DCHECK_EQ(cls->NeedsAccessCheck(),
6244 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6245 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6246
6247 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6248 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6249 ? LocationSummary::kCallOnSlowPath
6250 : LocationSummary::kNoCall;
6251 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6252 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6253 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6254 }
6255
6256 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6257 locations->SetInAt(0, Location::RequiresRegister());
6258 }
6259 locations->SetOut(Location::RequiresRegister());
6260 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6261 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6262 // Rely on the type resolution and/or initialization to save everything.
6263 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6264 } else {
6265 // For non-Baker read barrier we have a temp-clobbering call.
6266 }
6267 }
6268 }
6269
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6270 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6271 dex::TypeIndex type_index,
6272 Handle<mirror::Class> handle) {
6273 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6274 // Add a patch entry and return the label.
6275 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6276 PatchInfo<Label>* info = &jit_class_patches_.back();
6277 return &info->label;
6278 }
6279
6280 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6281 // move.
VisitLoadClass(HLoadClass * cls)6282 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6283 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6284 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6285 codegen_->GenerateLoadClassRuntimeCall(cls);
6286 return;
6287 }
6288 DCHECK_EQ(cls->NeedsAccessCheck(),
6289 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6290 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6291
6292 LocationSummary* locations = cls->GetLocations();
6293 Location out_loc = locations->Out();
6294 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6295
6296 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6297 ? kWithoutReadBarrier
6298 : kCompilerReadBarrierOption;
6299 bool generate_null_check = false;
6300 switch (load_kind) {
6301 case HLoadClass::LoadKind::kReferrersClass: {
6302 DCHECK(!cls->CanCallRuntime());
6303 DCHECK(!cls->MustGenerateClinitCheck());
6304 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6305 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6306 GenerateGcRootFieldLoad(
6307 cls,
6308 out_loc,
6309 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6310 /* fixup_label= */ nullptr,
6311 read_barrier_option);
6312 break;
6313 }
6314 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6315 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6316 codegen_->GetCompilerOptions().IsBootImageExtension());
6317 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6318 __ leal(out,
6319 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6320 codegen_->RecordBootImageTypePatch(cls);
6321 break;
6322 case HLoadClass::LoadKind::kBootImageRelRo: {
6323 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6324 __ movl(out,
6325 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6326 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6327 break;
6328 }
6329 case HLoadClass::LoadKind::kBssEntry:
6330 case HLoadClass::LoadKind::kBssEntryPublic:
6331 case HLoadClass::LoadKind::kBssEntryPackage: {
6332 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6333 /* no_rip= */ false);
6334 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6335 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6336 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6337 // No need for memory fence, thanks to the x86-64 memory model.
6338 generate_null_check = true;
6339 break;
6340 }
6341 case HLoadClass::LoadKind::kJitBootImageAddress: {
6342 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6343 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6344 DCHECK_NE(address, 0u);
6345 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6346 break;
6347 }
6348 case HLoadClass::LoadKind::kJitTableAddress: {
6349 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6350 /* no_rip= */ true);
6351 Label* fixup_label =
6352 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6353 // /* GcRoot<mirror::Class> */ out = *address
6354 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6355 break;
6356 }
6357 default:
6358 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6359 UNREACHABLE();
6360 }
6361
6362 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6363 DCHECK(cls->CanCallRuntime());
6364 SlowPathCode* slow_path =
6365 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6366 codegen_->AddSlowPath(slow_path);
6367 if (generate_null_check) {
6368 __ testl(out, out);
6369 __ j(kEqual, slow_path->GetEntryLabel());
6370 }
6371 if (cls->MustGenerateClinitCheck()) {
6372 GenerateClassInitializationCheck(slow_path, out);
6373 } else {
6374 __ Bind(slow_path->GetExitLabel());
6375 }
6376 }
6377 }
6378
VisitClinitCheck(HClinitCheck * check)6379 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6380 LocationSummary* locations =
6381 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6382 locations->SetInAt(0, Location::RequiresRegister());
6383 if (check->HasUses()) {
6384 locations->SetOut(Location::SameAsFirstInput());
6385 }
6386 // Rely on the type initialization to save everything we need.
6387 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6388 }
6389
VisitLoadMethodHandle(HLoadMethodHandle * load)6390 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6391 // Custom calling convention: RAX serves as both input and output.
6392 Location location = Location::RegisterLocation(RAX);
6393 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6394 }
6395
VisitLoadMethodHandle(HLoadMethodHandle * load)6396 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6397 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6398 }
6399
VisitLoadMethodType(HLoadMethodType * load)6400 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6401 // Custom calling convention: RAX serves as both input and output.
6402 Location location = Location::RegisterLocation(RAX);
6403 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6404 }
6405
VisitLoadMethodType(HLoadMethodType * load)6406 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6407 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6408 }
6409
VisitClinitCheck(HClinitCheck * check)6410 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6411 // We assume the class to not be null.
6412 SlowPathCode* slow_path =
6413 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6414 codegen_->AddSlowPath(slow_path);
6415 GenerateClassInitializationCheck(slow_path,
6416 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6417 }
6418
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6419 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6420 HLoadString::LoadKind desired_string_load_kind) {
6421 switch (desired_string_load_kind) {
6422 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6423 case HLoadString::LoadKind::kBootImageRelRo:
6424 case HLoadString::LoadKind::kBssEntry:
6425 DCHECK(!GetCompilerOptions().IsJitCompiler());
6426 break;
6427 case HLoadString::LoadKind::kJitBootImageAddress:
6428 case HLoadString::LoadKind::kJitTableAddress:
6429 DCHECK(GetCompilerOptions().IsJitCompiler());
6430 break;
6431 case HLoadString::LoadKind::kRuntimeCall:
6432 break;
6433 }
6434 return desired_string_load_kind;
6435 }
6436
VisitLoadString(HLoadString * load)6437 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6438 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6439 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6440 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6441 locations->SetOut(Location::RegisterLocation(RAX));
6442 } else {
6443 locations->SetOut(Location::RequiresRegister());
6444 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6445 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6446 // Rely on the pResolveString to save everything.
6447 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6448 } else {
6449 // For non-Baker read barrier we have a temp-clobbering call.
6450 }
6451 }
6452 }
6453 }
6454
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6455 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6456 dex::StringIndex string_index,
6457 Handle<mirror::String> handle) {
6458 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6459 // Add a patch entry and return the label.
6460 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6461 PatchInfo<Label>* info = &jit_string_patches_.back();
6462 return &info->label;
6463 }
6464
6465 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6466 // move.
VisitLoadString(HLoadString * load)6467 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6468 LocationSummary* locations = load->GetLocations();
6469 Location out_loc = locations->Out();
6470 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6471
6472 switch (load->GetLoadKind()) {
6473 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6474 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6475 codegen_->GetCompilerOptions().IsBootImageExtension());
6476 __ leal(out,
6477 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6478 codegen_->RecordBootImageStringPatch(load);
6479 return;
6480 }
6481 case HLoadString::LoadKind::kBootImageRelRo: {
6482 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6483 __ movl(out,
6484 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6485 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6486 return;
6487 }
6488 case HLoadString::LoadKind::kBssEntry: {
6489 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6490 /* no_rip= */ false);
6491 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6492 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6493 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6494 // No need for memory fence, thanks to the x86-64 memory model.
6495 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6496 codegen_->AddSlowPath(slow_path);
6497 __ testl(out, out);
6498 __ j(kEqual, slow_path->GetEntryLabel());
6499 __ Bind(slow_path->GetExitLabel());
6500 return;
6501 }
6502 case HLoadString::LoadKind::kJitBootImageAddress: {
6503 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6504 DCHECK_NE(address, 0u);
6505 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6506 return;
6507 }
6508 case HLoadString::LoadKind::kJitTableAddress: {
6509 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6510 /* no_rip= */ true);
6511 Label* fixup_label = codegen_->NewJitRootStringPatch(
6512 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6513 // /* GcRoot<mirror::String> */ out = *address
6514 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6515 return;
6516 }
6517 default:
6518 break;
6519 }
6520
6521 // TODO: Re-add the compiler code to do string dex cache lookup again.
6522 // Custom calling convention: RAX serves as both input and output.
6523 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6524 codegen_->InvokeRuntime(kQuickResolveString,
6525 load,
6526 load->GetDexPc());
6527 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6528 }
6529
GetExceptionTlsAddress()6530 static Address GetExceptionTlsAddress() {
6531 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6532 /* no_rip= */ true);
6533 }
6534
VisitLoadException(HLoadException * load)6535 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6536 LocationSummary* locations =
6537 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6538 locations->SetOut(Location::RequiresRegister());
6539 }
6540
VisitLoadException(HLoadException * load)6541 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6542 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6543 }
6544
VisitClearException(HClearException * clear)6545 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6546 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6547 }
6548
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6549 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6550 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6551 }
6552
VisitThrow(HThrow * instruction)6553 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6554 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6555 instruction, LocationSummary::kCallOnMainOnly);
6556 InvokeRuntimeCallingConvention calling_convention;
6557 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6558 }
6559
VisitThrow(HThrow * instruction)6560 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6561 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6562 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6563 }
6564
6565 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6566 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6567 if (kEmitCompilerReadBarrier &&
6568 !kUseBakerReadBarrier &&
6569 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6570 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6571 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6572 return 1;
6573 }
6574 return 0;
6575 }
6576
6577 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6578 // interface pointer, the current interface is compared in memory.
6579 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6580 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6581 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6582 return 2;
6583 }
6584 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6585 }
6586
VisitInstanceOf(HInstanceOf * instruction)6587 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6588 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6589 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6590 bool baker_read_barrier_slow_path = false;
6591 switch (type_check_kind) {
6592 case TypeCheckKind::kExactCheck:
6593 case TypeCheckKind::kAbstractClassCheck:
6594 case TypeCheckKind::kClassHierarchyCheck:
6595 case TypeCheckKind::kArrayObjectCheck: {
6596 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6597 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6598 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6599 break;
6600 }
6601 case TypeCheckKind::kArrayCheck:
6602 case TypeCheckKind::kUnresolvedCheck:
6603 case TypeCheckKind::kInterfaceCheck:
6604 call_kind = LocationSummary::kCallOnSlowPath;
6605 break;
6606 case TypeCheckKind::kBitstringCheck:
6607 break;
6608 }
6609
6610 LocationSummary* locations =
6611 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6612 if (baker_read_barrier_slow_path) {
6613 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6614 }
6615 locations->SetInAt(0, Location::RequiresRegister());
6616 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6617 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6618 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6619 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6620 } else {
6621 locations->SetInAt(1, Location::Any());
6622 }
6623 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6624 locations->SetOut(Location::RequiresRegister());
6625 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6626 }
6627
VisitInstanceOf(HInstanceOf * instruction)6628 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6629 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6630 LocationSummary* locations = instruction->GetLocations();
6631 Location obj_loc = locations->InAt(0);
6632 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6633 Location cls = locations->InAt(1);
6634 Location out_loc = locations->Out();
6635 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6636 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6637 DCHECK_LE(num_temps, 1u);
6638 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6639 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6640 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6641 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6642 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6643 SlowPathCode* slow_path = nullptr;
6644 NearLabel done, zero;
6645
6646 // Return 0 if `obj` is null.
6647 // Avoid null check if we know obj is not null.
6648 if (instruction->MustDoNullCheck()) {
6649 __ testl(obj, obj);
6650 __ j(kEqual, &zero);
6651 }
6652
6653 switch (type_check_kind) {
6654 case TypeCheckKind::kExactCheck: {
6655 ReadBarrierOption read_barrier_option =
6656 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6657 // /* HeapReference<Class> */ out = obj->klass_
6658 GenerateReferenceLoadTwoRegisters(instruction,
6659 out_loc,
6660 obj_loc,
6661 class_offset,
6662 read_barrier_option);
6663 if (cls.IsRegister()) {
6664 __ cmpl(out, cls.AsRegister<CpuRegister>());
6665 } else {
6666 DCHECK(cls.IsStackSlot()) << cls;
6667 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6668 }
6669 if (zero.IsLinked()) {
6670 // Classes must be equal for the instanceof to succeed.
6671 __ j(kNotEqual, &zero);
6672 __ movl(out, Immediate(1));
6673 __ jmp(&done);
6674 } else {
6675 __ setcc(kEqual, out);
6676 // setcc only sets the low byte.
6677 __ andl(out, Immediate(1));
6678 }
6679 break;
6680 }
6681
6682 case TypeCheckKind::kAbstractClassCheck: {
6683 ReadBarrierOption read_barrier_option =
6684 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6685 // /* HeapReference<Class> */ out = obj->klass_
6686 GenerateReferenceLoadTwoRegisters(instruction,
6687 out_loc,
6688 obj_loc,
6689 class_offset,
6690 read_barrier_option);
6691 // If the class is abstract, we eagerly fetch the super class of the
6692 // object to avoid doing a comparison we know will fail.
6693 NearLabel loop, success;
6694 __ Bind(&loop);
6695 // /* HeapReference<Class> */ out = out->super_class_
6696 GenerateReferenceLoadOneRegister(instruction,
6697 out_loc,
6698 super_offset,
6699 maybe_temp_loc,
6700 read_barrier_option);
6701 __ testl(out, out);
6702 // If `out` is null, we use it for the result, and jump to `done`.
6703 __ j(kEqual, &done);
6704 if (cls.IsRegister()) {
6705 __ cmpl(out, cls.AsRegister<CpuRegister>());
6706 } else {
6707 DCHECK(cls.IsStackSlot()) << cls;
6708 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6709 }
6710 __ j(kNotEqual, &loop);
6711 __ movl(out, Immediate(1));
6712 if (zero.IsLinked()) {
6713 __ jmp(&done);
6714 }
6715 break;
6716 }
6717
6718 case TypeCheckKind::kClassHierarchyCheck: {
6719 ReadBarrierOption read_barrier_option =
6720 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6721 // /* HeapReference<Class> */ out = obj->klass_
6722 GenerateReferenceLoadTwoRegisters(instruction,
6723 out_loc,
6724 obj_loc,
6725 class_offset,
6726 read_barrier_option);
6727 // Walk over the class hierarchy to find a match.
6728 NearLabel loop, success;
6729 __ Bind(&loop);
6730 if (cls.IsRegister()) {
6731 __ cmpl(out, cls.AsRegister<CpuRegister>());
6732 } else {
6733 DCHECK(cls.IsStackSlot()) << cls;
6734 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6735 }
6736 __ j(kEqual, &success);
6737 // /* HeapReference<Class> */ out = out->super_class_
6738 GenerateReferenceLoadOneRegister(instruction,
6739 out_loc,
6740 super_offset,
6741 maybe_temp_loc,
6742 read_barrier_option);
6743 __ testl(out, out);
6744 __ j(kNotEqual, &loop);
6745 // If `out` is null, we use it for the result, and jump to `done`.
6746 __ jmp(&done);
6747 __ Bind(&success);
6748 __ movl(out, Immediate(1));
6749 if (zero.IsLinked()) {
6750 __ jmp(&done);
6751 }
6752 break;
6753 }
6754
6755 case TypeCheckKind::kArrayObjectCheck: {
6756 ReadBarrierOption read_barrier_option =
6757 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6758 // /* HeapReference<Class> */ out = obj->klass_
6759 GenerateReferenceLoadTwoRegisters(instruction,
6760 out_loc,
6761 obj_loc,
6762 class_offset,
6763 read_barrier_option);
6764 // Do an exact check.
6765 NearLabel exact_check;
6766 if (cls.IsRegister()) {
6767 __ cmpl(out, cls.AsRegister<CpuRegister>());
6768 } else {
6769 DCHECK(cls.IsStackSlot()) << cls;
6770 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6771 }
6772 __ j(kEqual, &exact_check);
6773 // Otherwise, we need to check that the object's class is a non-primitive array.
6774 // /* HeapReference<Class> */ out = out->component_type_
6775 GenerateReferenceLoadOneRegister(instruction,
6776 out_loc,
6777 component_offset,
6778 maybe_temp_loc,
6779 read_barrier_option);
6780 __ testl(out, out);
6781 // If `out` is null, we use it for the result, and jump to `done`.
6782 __ j(kEqual, &done);
6783 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6784 __ j(kNotEqual, &zero);
6785 __ Bind(&exact_check);
6786 __ movl(out, Immediate(1));
6787 __ jmp(&done);
6788 break;
6789 }
6790
6791 case TypeCheckKind::kArrayCheck: {
6792 // No read barrier since the slow path will retry upon failure.
6793 // /* HeapReference<Class> */ out = obj->klass_
6794 GenerateReferenceLoadTwoRegisters(instruction,
6795 out_loc,
6796 obj_loc,
6797 class_offset,
6798 kWithoutReadBarrier);
6799 if (cls.IsRegister()) {
6800 __ cmpl(out, cls.AsRegister<CpuRegister>());
6801 } else {
6802 DCHECK(cls.IsStackSlot()) << cls;
6803 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6804 }
6805 DCHECK(locations->OnlyCallsOnSlowPath());
6806 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6807 instruction, /* is_fatal= */ false);
6808 codegen_->AddSlowPath(slow_path);
6809 __ j(kNotEqual, slow_path->GetEntryLabel());
6810 __ movl(out, Immediate(1));
6811 if (zero.IsLinked()) {
6812 __ jmp(&done);
6813 }
6814 break;
6815 }
6816
6817 case TypeCheckKind::kUnresolvedCheck:
6818 case TypeCheckKind::kInterfaceCheck: {
6819 // Note that we indeed only call on slow path, but we always go
6820 // into the slow path for the unresolved and interface check
6821 // cases.
6822 //
6823 // We cannot directly call the InstanceofNonTrivial runtime
6824 // entry point without resorting to a type checking slow path
6825 // here (i.e. by calling InvokeRuntime directly), as it would
6826 // require to assign fixed registers for the inputs of this
6827 // HInstanceOf instruction (following the runtime calling
6828 // convention), which might be cluttered by the potential first
6829 // read barrier emission at the beginning of this method.
6830 //
6831 // TODO: Introduce a new runtime entry point taking the object
6832 // to test (instead of its class) as argument, and let it deal
6833 // with the read barrier issues. This will let us refactor this
6834 // case of the `switch` code as it was previously (with a direct
6835 // call to the runtime not using a type checking slow path).
6836 // This should also be beneficial for the other cases above.
6837 DCHECK(locations->OnlyCallsOnSlowPath());
6838 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6839 instruction, /* is_fatal= */ false);
6840 codegen_->AddSlowPath(slow_path);
6841 __ jmp(slow_path->GetEntryLabel());
6842 if (zero.IsLinked()) {
6843 __ jmp(&done);
6844 }
6845 break;
6846 }
6847
6848 case TypeCheckKind::kBitstringCheck: {
6849 // /* HeapReference<Class> */ temp = obj->klass_
6850 GenerateReferenceLoadTwoRegisters(instruction,
6851 out_loc,
6852 obj_loc,
6853 class_offset,
6854 kWithoutReadBarrier);
6855
6856 GenerateBitstringTypeCheckCompare(instruction, out);
6857 if (zero.IsLinked()) {
6858 __ j(kNotEqual, &zero);
6859 __ movl(out, Immediate(1));
6860 __ jmp(&done);
6861 } else {
6862 __ setcc(kEqual, out);
6863 // setcc only sets the low byte.
6864 __ andl(out, Immediate(1));
6865 }
6866 break;
6867 }
6868 }
6869
6870 if (zero.IsLinked()) {
6871 __ Bind(&zero);
6872 __ xorl(out, out);
6873 }
6874
6875 if (done.IsLinked()) {
6876 __ Bind(&done);
6877 }
6878
6879 if (slow_path != nullptr) {
6880 __ Bind(slow_path->GetExitLabel());
6881 }
6882 }
6883
VisitCheckCast(HCheckCast * instruction)6884 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6885 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6886 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6887 LocationSummary* locations =
6888 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6889 locations->SetInAt(0, Location::RequiresRegister());
6890 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6891 // Require a register for the interface check since there is a loop that compares the class to
6892 // a memory address.
6893 locations->SetInAt(1, Location::RequiresRegister());
6894 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6895 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6896 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6897 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6898 } else {
6899 locations->SetInAt(1, Location::Any());
6900 }
6901 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6902 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6903 }
6904
VisitCheckCast(HCheckCast * instruction)6905 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6906 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6907 LocationSummary* locations = instruction->GetLocations();
6908 Location obj_loc = locations->InAt(0);
6909 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6910 Location cls = locations->InAt(1);
6911 Location temp_loc = locations->GetTemp(0);
6912 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6913 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6914 DCHECK_GE(num_temps, 1u);
6915 DCHECK_LE(num_temps, 2u);
6916 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6917 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6918 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6919 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6920 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6921 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6922 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6923 const uint32_t object_array_data_offset =
6924 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6925
6926 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6927 SlowPathCode* type_check_slow_path =
6928 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6929 instruction, is_type_check_slow_path_fatal);
6930 codegen_->AddSlowPath(type_check_slow_path);
6931
6932
6933 NearLabel done;
6934 // Avoid null check if we know obj is not null.
6935 if (instruction->MustDoNullCheck()) {
6936 __ testl(obj, obj);
6937 __ j(kEqual, &done);
6938 }
6939
6940 switch (type_check_kind) {
6941 case TypeCheckKind::kExactCheck:
6942 case TypeCheckKind::kArrayCheck: {
6943 // /* HeapReference<Class> */ temp = obj->klass_
6944 GenerateReferenceLoadTwoRegisters(instruction,
6945 temp_loc,
6946 obj_loc,
6947 class_offset,
6948 kWithoutReadBarrier);
6949 if (cls.IsRegister()) {
6950 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6951 } else {
6952 DCHECK(cls.IsStackSlot()) << cls;
6953 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6954 }
6955 // Jump to slow path for throwing the exception or doing a
6956 // more involved array check.
6957 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6958 break;
6959 }
6960
6961 case TypeCheckKind::kAbstractClassCheck: {
6962 // /* HeapReference<Class> */ temp = obj->klass_
6963 GenerateReferenceLoadTwoRegisters(instruction,
6964 temp_loc,
6965 obj_loc,
6966 class_offset,
6967 kWithoutReadBarrier);
6968 // If the class is abstract, we eagerly fetch the super class of the
6969 // object to avoid doing a comparison we know will fail.
6970 NearLabel loop;
6971 __ Bind(&loop);
6972 // /* HeapReference<Class> */ temp = temp->super_class_
6973 GenerateReferenceLoadOneRegister(instruction,
6974 temp_loc,
6975 super_offset,
6976 maybe_temp2_loc,
6977 kWithoutReadBarrier);
6978
6979 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6980 // exception.
6981 __ testl(temp, temp);
6982 // Otherwise, compare the classes.
6983 __ j(kZero, type_check_slow_path->GetEntryLabel());
6984 if (cls.IsRegister()) {
6985 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6986 } else {
6987 DCHECK(cls.IsStackSlot()) << cls;
6988 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6989 }
6990 __ j(kNotEqual, &loop);
6991 break;
6992 }
6993
6994 case TypeCheckKind::kClassHierarchyCheck: {
6995 // /* HeapReference<Class> */ temp = obj->klass_
6996 GenerateReferenceLoadTwoRegisters(instruction,
6997 temp_loc,
6998 obj_loc,
6999 class_offset,
7000 kWithoutReadBarrier);
7001 // Walk over the class hierarchy to find a match.
7002 NearLabel loop;
7003 __ Bind(&loop);
7004 if (cls.IsRegister()) {
7005 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7006 } else {
7007 DCHECK(cls.IsStackSlot()) << cls;
7008 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7009 }
7010 __ j(kEqual, &done);
7011
7012 // /* HeapReference<Class> */ temp = temp->super_class_
7013 GenerateReferenceLoadOneRegister(instruction,
7014 temp_loc,
7015 super_offset,
7016 maybe_temp2_loc,
7017 kWithoutReadBarrier);
7018
7019 // If the class reference currently in `temp` is not null, jump
7020 // back at the beginning of the loop.
7021 __ testl(temp, temp);
7022 __ j(kNotZero, &loop);
7023 // Otherwise, jump to the slow path to throw the exception.
7024 __ jmp(type_check_slow_path->GetEntryLabel());
7025 break;
7026 }
7027
7028 case TypeCheckKind::kArrayObjectCheck: {
7029 // /* HeapReference<Class> */ temp = obj->klass_
7030 GenerateReferenceLoadTwoRegisters(instruction,
7031 temp_loc,
7032 obj_loc,
7033 class_offset,
7034 kWithoutReadBarrier);
7035 // Do an exact check.
7036 NearLabel check_non_primitive_component_type;
7037 if (cls.IsRegister()) {
7038 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7039 } else {
7040 DCHECK(cls.IsStackSlot()) << cls;
7041 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7042 }
7043 __ j(kEqual, &done);
7044
7045 // Otherwise, we need to check that the object's class is a non-primitive array.
7046 // /* HeapReference<Class> */ temp = temp->component_type_
7047 GenerateReferenceLoadOneRegister(instruction,
7048 temp_loc,
7049 component_offset,
7050 maybe_temp2_loc,
7051 kWithoutReadBarrier);
7052
7053 // If the component type is not null (i.e. the object is indeed
7054 // an array), jump to label `check_non_primitive_component_type`
7055 // to further check that this component type is not a primitive
7056 // type.
7057 __ testl(temp, temp);
7058 // Otherwise, jump to the slow path to throw the exception.
7059 __ j(kZero, type_check_slow_path->GetEntryLabel());
7060 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7061 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7062 break;
7063 }
7064
7065 case TypeCheckKind::kUnresolvedCheck: {
7066 // We always go into the type check slow path for the unresolved case.
7067 //
7068 // We cannot directly call the CheckCast runtime entry point
7069 // without resorting to a type checking slow path here (i.e. by
7070 // calling InvokeRuntime directly), as it would require to
7071 // assign fixed registers for the inputs of this HInstanceOf
7072 // instruction (following the runtime calling convention), which
7073 // might be cluttered by the potential first read barrier
7074 // emission at the beginning of this method.
7075 __ jmp(type_check_slow_path->GetEntryLabel());
7076 break;
7077 }
7078
7079 case TypeCheckKind::kInterfaceCheck: {
7080 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7081 // We can not get false positives by doing this.
7082 // /* HeapReference<Class> */ temp = obj->klass_
7083 GenerateReferenceLoadTwoRegisters(instruction,
7084 temp_loc,
7085 obj_loc,
7086 class_offset,
7087 kWithoutReadBarrier);
7088
7089 // /* HeapReference<Class> */ temp = temp->iftable_
7090 GenerateReferenceLoadTwoRegisters(instruction,
7091 temp_loc,
7092 temp_loc,
7093 iftable_offset,
7094 kWithoutReadBarrier);
7095 // Iftable is never null.
7096 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7097 // Maybe poison the `cls` for direct comparison with memory.
7098 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7099 // Loop through the iftable and check if any class matches.
7100 NearLabel start_loop;
7101 __ Bind(&start_loop);
7102 // Need to subtract first to handle the empty array case.
7103 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7104 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7105 // Go to next interface if the classes do not match.
7106 __ cmpl(cls.AsRegister<CpuRegister>(),
7107 CodeGeneratorX86_64::ArrayAddress(temp,
7108 maybe_temp2_loc,
7109 TIMES_4,
7110 object_array_data_offset));
7111 __ j(kNotEqual, &start_loop); // Return if same class.
7112 // If `cls` was poisoned above, unpoison it.
7113 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7114 break;
7115 }
7116
7117 case TypeCheckKind::kBitstringCheck: {
7118 // /* HeapReference<Class> */ temp = obj->klass_
7119 GenerateReferenceLoadTwoRegisters(instruction,
7120 temp_loc,
7121 obj_loc,
7122 class_offset,
7123 kWithoutReadBarrier);
7124
7125 GenerateBitstringTypeCheckCompare(instruction, temp);
7126 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7127 break;
7128 }
7129 }
7130
7131 if (done.IsLinked()) {
7132 __ Bind(&done);
7133 }
7134
7135 __ Bind(type_check_slow_path->GetExitLabel());
7136 }
7137
VisitMonitorOperation(HMonitorOperation * instruction)7138 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7139 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7140 instruction, LocationSummary::kCallOnMainOnly);
7141 InvokeRuntimeCallingConvention calling_convention;
7142 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7143 }
7144
VisitMonitorOperation(HMonitorOperation * instruction)7145 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7146 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7147 instruction,
7148 instruction->GetDexPc());
7149 if (instruction->IsEnter()) {
7150 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7151 } else {
7152 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7153 }
7154 }
7155
VisitX86AndNot(HX86AndNot * instruction)7156 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7157 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7158 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7159 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7160 locations->SetInAt(0, Location::RequiresRegister());
7161 // There is no immediate variant of negated bitwise and in X86.
7162 locations->SetInAt(1, Location::RequiresRegister());
7163 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7164 }
7165
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7166 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7167 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7168 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7169 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7170 locations->SetInAt(0, Location::RequiresRegister());
7171 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7172 }
7173
VisitX86AndNot(HX86AndNot * instruction)7174 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7175 LocationSummary* locations = instruction->GetLocations();
7176 Location first = locations->InAt(0);
7177 Location second = locations->InAt(1);
7178 Location dest = locations->Out();
7179 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7180 }
7181
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7182 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7183 LocationSummary* locations = instruction->GetLocations();
7184 Location src = locations->InAt(0);
7185 Location dest = locations->Out();
7186 switch (instruction->GetOpKind()) {
7187 case HInstruction::kAnd:
7188 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7189 break;
7190 case HInstruction::kXor:
7191 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7192 break;
7193 default:
7194 LOG(FATAL) << "Unreachable";
7195 }
7196 }
7197
VisitAnd(HAnd * instruction)7198 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7199 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7200 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7201
HandleBitwiseOperation(HBinaryOperation * instruction)7202 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7203 LocationSummary* locations =
7204 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7205 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7206 || instruction->GetResultType() == DataType::Type::kInt64);
7207 locations->SetInAt(0, Location::RequiresRegister());
7208 locations->SetInAt(1, Location::Any());
7209 locations->SetOut(Location::SameAsFirstInput());
7210 }
7211
VisitAnd(HAnd * instruction)7212 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7213 HandleBitwiseOperation(instruction);
7214 }
7215
VisitOr(HOr * instruction)7216 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7217 HandleBitwiseOperation(instruction);
7218 }
7219
VisitXor(HXor * instruction)7220 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7221 HandleBitwiseOperation(instruction);
7222 }
7223
HandleBitwiseOperation(HBinaryOperation * instruction)7224 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7225 LocationSummary* locations = instruction->GetLocations();
7226 Location first = locations->InAt(0);
7227 Location second = locations->InAt(1);
7228 DCHECK(first.Equals(locations->Out()));
7229
7230 if (instruction->GetResultType() == DataType::Type::kInt32) {
7231 if (second.IsRegister()) {
7232 if (instruction->IsAnd()) {
7233 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7234 } else if (instruction->IsOr()) {
7235 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7236 } else {
7237 DCHECK(instruction->IsXor());
7238 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7239 }
7240 } else if (second.IsConstant()) {
7241 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7242 if (instruction->IsAnd()) {
7243 __ andl(first.AsRegister<CpuRegister>(), imm);
7244 } else if (instruction->IsOr()) {
7245 __ orl(first.AsRegister<CpuRegister>(), imm);
7246 } else {
7247 DCHECK(instruction->IsXor());
7248 __ xorl(first.AsRegister<CpuRegister>(), imm);
7249 }
7250 } else {
7251 Address address(CpuRegister(RSP), second.GetStackIndex());
7252 if (instruction->IsAnd()) {
7253 __ andl(first.AsRegister<CpuRegister>(), address);
7254 } else if (instruction->IsOr()) {
7255 __ orl(first.AsRegister<CpuRegister>(), address);
7256 } else {
7257 DCHECK(instruction->IsXor());
7258 __ xorl(first.AsRegister<CpuRegister>(), address);
7259 }
7260 }
7261 } else {
7262 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7263 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7264 bool second_is_constant = false;
7265 int64_t value = 0;
7266 if (second.IsConstant()) {
7267 second_is_constant = true;
7268 value = second.GetConstant()->AsLongConstant()->GetValue();
7269 }
7270 bool is_int32_value = IsInt<32>(value);
7271
7272 if (instruction->IsAnd()) {
7273 if (second_is_constant) {
7274 if (is_int32_value) {
7275 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7276 } else {
7277 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7278 }
7279 } else if (second.IsDoubleStackSlot()) {
7280 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7281 } else {
7282 __ andq(first_reg, second.AsRegister<CpuRegister>());
7283 }
7284 } else if (instruction->IsOr()) {
7285 if (second_is_constant) {
7286 if (is_int32_value) {
7287 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7288 } else {
7289 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7290 }
7291 } else if (second.IsDoubleStackSlot()) {
7292 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7293 } else {
7294 __ orq(first_reg, second.AsRegister<CpuRegister>());
7295 }
7296 } else {
7297 DCHECK(instruction->IsXor());
7298 if (second_is_constant) {
7299 if (is_int32_value) {
7300 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7301 } else {
7302 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7303 }
7304 } else if (second.IsDoubleStackSlot()) {
7305 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7306 } else {
7307 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7308 }
7309 }
7310 }
7311 }
7312
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7313 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7314 HInstruction* instruction,
7315 Location out,
7316 uint32_t offset,
7317 Location maybe_temp,
7318 ReadBarrierOption read_barrier_option) {
7319 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7320 if (read_barrier_option == kWithReadBarrier) {
7321 CHECK(kEmitCompilerReadBarrier);
7322 if (kUseBakerReadBarrier) {
7323 // Load with fast path based Baker's read barrier.
7324 // /* HeapReference<Object> */ out = *(out + offset)
7325 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7326 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7327 } else {
7328 // Load with slow path based read barrier.
7329 // Save the value of `out` into `maybe_temp` before overwriting it
7330 // in the following move operation, as we will need it for the
7331 // read barrier below.
7332 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7333 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7334 // /* HeapReference<Object> */ out = *(out + offset)
7335 __ movl(out_reg, Address(out_reg, offset));
7336 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7337 }
7338 } else {
7339 // Plain load with no read barrier.
7340 // /* HeapReference<Object> */ out = *(out + offset)
7341 __ movl(out_reg, Address(out_reg, offset));
7342 __ MaybeUnpoisonHeapReference(out_reg);
7343 }
7344 }
7345
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7346 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7347 HInstruction* instruction,
7348 Location out,
7349 Location obj,
7350 uint32_t offset,
7351 ReadBarrierOption read_barrier_option) {
7352 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7353 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7354 if (read_barrier_option == kWithReadBarrier) {
7355 CHECK(kEmitCompilerReadBarrier);
7356 if (kUseBakerReadBarrier) {
7357 // Load with fast path based Baker's read barrier.
7358 // /* HeapReference<Object> */ out = *(obj + offset)
7359 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7360 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7361 } else {
7362 // Load with slow path based read barrier.
7363 // /* HeapReference<Object> */ out = *(obj + offset)
7364 __ movl(out_reg, Address(obj_reg, offset));
7365 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7366 }
7367 } else {
7368 // Plain load with no read barrier.
7369 // /* HeapReference<Object> */ out = *(obj + offset)
7370 __ movl(out_reg, Address(obj_reg, offset));
7371 __ MaybeUnpoisonHeapReference(out_reg);
7372 }
7373 }
7374
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7375 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7376 HInstruction* instruction,
7377 Location root,
7378 const Address& address,
7379 Label* fixup_label,
7380 ReadBarrierOption read_barrier_option) {
7381 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7382 if (read_barrier_option == kWithReadBarrier) {
7383 DCHECK(kEmitCompilerReadBarrier);
7384 if (kUseBakerReadBarrier) {
7385 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7386 // Baker's read barrier are used:
7387 //
7388 // root = obj.field;
7389 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7390 // if (temp != null) {
7391 // root = temp(root)
7392 // }
7393
7394 // /* GcRoot<mirror::Object> */ root = *address
7395 __ movl(root_reg, address);
7396 if (fixup_label != nullptr) {
7397 __ Bind(fixup_label);
7398 }
7399 static_assert(
7400 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7401 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7402 "have different sizes.");
7403 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7404 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7405 "have different sizes.");
7406
7407 // Slow path marking the GC root `root`.
7408 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7409 instruction, root, /* unpoison_ref_before_marking= */ false);
7410 codegen_->AddSlowPath(slow_path);
7411
7412 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7413 const int32_t entry_point_offset =
7414 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7415 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7416 // The entrypoint is null when the GC is not marking.
7417 __ j(kNotEqual, slow_path->GetEntryLabel());
7418 __ Bind(slow_path->GetExitLabel());
7419 } else {
7420 // GC root loaded through a slow path for read barriers other
7421 // than Baker's.
7422 // /* GcRoot<mirror::Object>* */ root = address
7423 __ leaq(root_reg, address);
7424 if (fixup_label != nullptr) {
7425 __ Bind(fixup_label);
7426 }
7427 // /* mirror::Object* */ root = root->Read()
7428 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7429 }
7430 } else {
7431 // Plain GC root load with no read barrier.
7432 // /* GcRoot<mirror::Object> */ root = *address
7433 __ movl(root_reg, address);
7434 if (fixup_label != nullptr) {
7435 __ Bind(fixup_label);
7436 }
7437 // Note that GC roots are not affected by heap poisoning, thus we
7438 // do not have to unpoison `root_reg` here.
7439 }
7440 }
7441
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7442 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7443 Location ref,
7444 CpuRegister obj,
7445 uint32_t offset,
7446 bool needs_null_check) {
7447 DCHECK(kEmitCompilerReadBarrier);
7448 DCHECK(kUseBakerReadBarrier);
7449
7450 // /* HeapReference<Object> */ ref = *(obj + offset)
7451 Address src(obj, offset);
7452 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7453 }
7454
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7455 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7456 Location ref,
7457 CpuRegister obj,
7458 uint32_t data_offset,
7459 Location index,
7460 bool needs_null_check) {
7461 DCHECK(kEmitCompilerReadBarrier);
7462 DCHECK(kUseBakerReadBarrier);
7463
7464 static_assert(
7465 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7466 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7467 // /* HeapReference<Object> */ ref =
7468 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7469 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7470 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7471 }
7472
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7473 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7474 Location ref,
7475 CpuRegister obj,
7476 const Address& src,
7477 bool needs_null_check,
7478 bool always_update_field,
7479 CpuRegister* temp1,
7480 CpuRegister* temp2) {
7481 DCHECK(kEmitCompilerReadBarrier);
7482 DCHECK(kUseBakerReadBarrier);
7483
7484 // In slow path based read barriers, the read barrier call is
7485 // inserted after the original load. However, in fast path based
7486 // Baker's read barriers, we need to perform the load of
7487 // mirror::Object::monitor_ *before* the original reference load.
7488 // This load-load ordering is required by the read barrier.
7489 // The fast path/slow path (for Baker's algorithm) should look like:
7490 //
7491 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7492 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7493 // HeapReference<Object> ref = *src; // Original reference load.
7494 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7495 // if (is_gray) {
7496 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7497 // }
7498 //
7499 // Note: the original implementation in ReadBarrier::Barrier is
7500 // slightly more complex as:
7501 // - it implements the load-load fence using a data dependency on
7502 // the high-bits of rb_state, which are expected to be all zeroes
7503 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7504 // here, which is a no-op thanks to the x86-64 memory model);
7505 // - it performs additional checks that we do not do here for
7506 // performance reasons.
7507
7508 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7509 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7510
7511 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7512 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7513 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7514 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7515 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7516 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7517
7518 // if (rb_state == ReadBarrier::GrayState())
7519 // ref = ReadBarrier::Mark(ref);
7520 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7521 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7522 if (needs_null_check) {
7523 MaybeRecordImplicitNullCheck(instruction);
7524 }
7525
7526 // Load fence to prevent load-load reordering.
7527 // Note that this is a no-op, thanks to the x86-64 memory model.
7528 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7529
7530 // The actual reference load.
7531 // /* HeapReference<Object> */ ref = *src
7532 __ movl(ref_reg, src); // Flags are unaffected.
7533
7534 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7535 // Slow path marking the object `ref` when it is gray.
7536 SlowPathCode* slow_path;
7537 if (always_update_field) {
7538 DCHECK(temp1 != nullptr);
7539 DCHECK(temp2 != nullptr);
7540 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7541 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7542 } else {
7543 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7544 instruction, ref, /* unpoison_ref_before_marking= */ true);
7545 }
7546 AddSlowPath(slow_path);
7547
7548 // We have done the "if" of the gray bit check above, now branch based on the flags.
7549 __ j(kNotZero, slow_path->GetEntryLabel());
7550
7551 // Object* ref = ref_addr->AsMirrorPtr()
7552 __ MaybeUnpoisonHeapReference(ref_reg);
7553
7554 __ Bind(slow_path->GetExitLabel());
7555 }
7556
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7557 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7558 Location out,
7559 Location ref,
7560 Location obj,
7561 uint32_t offset,
7562 Location index) {
7563 DCHECK(kEmitCompilerReadBarrier);
7564
7565 // Insert a slow path based read barrier *after* the reference load.
7566 //
7567 // If heap poisoning is enabled, the unpoisoning of the loaded
7568 // reference will be carried out by the runtime within the slow
7569 // path.
7570 //
7571 // Note that `ref` currently does not get unpoisoned (when heap
7572 // poisoning is enabled), which is alright as the `ref` argument is
7573 // not used by the artReadBarrierSlow entry point.
7574 //
7575 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7576 SlowPathCode* slow_path = new (GetScopedAllocator())
7577 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7578 AddSlowPath(slow_path);
7579
7580 __ jmp(slow_path->GetEntryLabel());
7581 __ Bind(slow_path->GetExitLabel());
7582 }
7583
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7584 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7585 Location out,
7586 Location ref,
7587 Location obj,
7588 uint32_t offset,
7589 Location index) {
7590 if (kEmitCompilerReadBarrier) {
7591 // Baker's read barriers shall be handled by the fast path
7592 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7593 DCHECK(!kUseBakerReadBarrier);
7594 // If heap poisoning is enabled, unpoisoning will be taken care of
7595 // by the runtime within the slow path.
7596 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7597 } else if (kPoisonHeapReferences) {
7598 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7599 }
7600 }
7601
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7602 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7603 Location out,
7604 Location root) {
7605 DCHECK(kEmitCompilerReadBarrier);
7606
7607 // Insert a slow path based read barrier *after* the GC root load.
7608 //
7609 // Note that GC roots are not affected by heap poisoning, so we do
7610 // not need to do anything special for this here.
7611 SlowPathCode* slow_path =
7612 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7613 AddSlowPath(slow_path);
7614
7615 __ jmp(slow_path->GetEntryLabel());
7616 __ Bind(slow_path->GetExitLabel());
7617 }
7618
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7619 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7620 // Nothing to do, this should be removed during prepare for register allocator.
7621 LOG(FATAL) << "Unreachable";
7622 }
7623
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7624 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7625 // Nothing to do, this should be removed during prepare for register allocator.
7626 LOG(FATAL) << "Unreachable";
7627 }
7628
7629 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7630 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7631 LocationSummary* locations =
7632 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7633 locations->SetInAt(0, Location::RequiresRegister());
7634 locations->AddTemp(Location::RequiresRegister());
7635 locations->AddTemp(Location::RequiresRegister());
7636 }
7637
VisitPackedSwitch(HPackedSwitch * switch_instr)7638 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7639 int32_t lower_bound = switch_instr->GetStartValue();
7640 uint32_t num_entries = switch_instr->GetNumEntries();
7641 LocationSummary* locations = switch_instr->GetLocations();
7642 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7643 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7644 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7645 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7646
7647 // Should we generate smaller inline compare/jumps?
7648 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7649 // Figure out the correct compare values and jump conditions.
7650 // Handle the first compare/branch as a special case because it might
7651 // jump to the default case.
7652 DCHECK_GT(num_entries, 2u);
7653 Condition first_condition;
7654 uint32_t index;
7655 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7656 if (lower_bound != 0) {
7657 first_condition = kLess;
7658 __ cmpl(value_reg_in, Immediate(lower_bound));
7659 __ j(first_condition, codegen_->GetLabelOf(default_block));
7660 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7661
7662 index = 1;
7663 } else {
7664 // Handle all the compare/jumps below.
7665 first_condition = kBelow;
7666 index = 0;
7667 }
7668
7669 // Handle the rest of the compare/jumps.
7670 for (; index + 1 < num_entries; index += 2) {
7671 int32_t compare_to_value = lower_bound + index + 1;
7672 __ cmpl(value_reg_in, Immediate(compare_to_value));
7673 // Jump to successors[index] if value < case_value[index].
7674 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7675 // Jump to successors[index + 1] if value == case_value[index + 1].
7676 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7677 }
7678
7679 if (index != num_entries) {
7680 // There are an odd number of entries. Handle the last one.
7681 DCHECK_EQ(index + 1, num_entries);
7682 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7683 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7684 }
7685
7686 // And the default for any other value.
7687 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7688 __ jmp(codegen_->GetLabelOf(default_block));
7689 }
7690 return;
7691 }
7692
7693 // Remove the bias, if needed.
7694 Register value_reg_out = value_reg_in.AsRegister();
7695 if (lower_bound != 0) {
7696 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7697 value_reg_out = temp_reg.AsRegister();
7698 }
7699 CpuRegister value_reg(value_reg_out);
7700
7701 // Is the value in range?
7702 __ cmpl(value_reg, Immediate(num_entries - 1));
7703 __ j(kAbove, codegen_->GetLabelOf(default_block));
7704
7705 // We are in the range of the table.
7706 // Load the address of the jump table in the constant area.
7707 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7708
7709 // Load the (signed) offset from the jump table.
7710 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7711
7712 // Add the offset to the address of the table base.
7713 __ addq(temp_reg, base_reg);
7714
7715 // And jump.
7716 __ jmp(temp_reg);
7717 }
7718
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7719 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7720 ATTRIBUTE_UNUSED) {
7721 LOG(FATAL) << "Unreachable";
7722 }
7723
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7724 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7725 ATTRIBUTE_UNUSED) {
7726 LOG(FATAL) << "Unreachable";
7727 }
7728
Load32BitValue(CpuRegister dest,int32_t value)7729 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7730 if (value == 0) {
7731 __ xorl(dest, dest);
7732 } else {
7733 __ movl(dest, Immediate(value));
7734 }
7735 }
7736
Load64BitValue(CpuRegister dest,int64_t value)7737 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7738 if (value == 0) {
7739 // Clears upper bits too.
7740 __ xorl(dest, dest);
7741 } else if (IsUint<32>(value)) {
7742 // We can use a 32 bit move, as it will zero-extend and is shorter.
7743 __ movl(dest, Immediate(static_cast<int32_t>(value)));
7744 } else {
7745 __ movq(dest, Immediate(value));
7746 }
7747 }
7748
Load32BitValue(XmmRegister dest,int32_t value)7749 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7750 if (value == 0) {
7751 __ xorps(dest, dest);
7752 } else {
7753 __ movss(dest, LiteralInt32Address(value));
7754 }
7755 }
7756
Load64BitValue(XmmRegister dest,int64_t value)7757 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7758 if (value == 0) {
7759 __ xorpd(dest, dest);
7760 } else {
7761 __ movsd(dest, LiteralInt64Address(value));
7762 }
7763 }
7764
Load32BitValue(XmmRegister dest,float value)7765 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7766 Load32BitValue(dest, bit_cast<int32_t, float>(value));
7767 }
7768
Load64BitValue(XmmRegister dest,double value)7769 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7770 Load64BitValue(dest, bit_cast<int64_t, double>(value));
7771 }
7772
Compare32BitValue(CpuRegister dest,int32_t value)7773 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7774 if (value == 0) {
7775 __ testl(dest, dest);
7776 } else {
7777 __ cmpl(dest, Immediate(value));
7778 }
7779 }
7780
Compare64BitValue(CpuRegister dest,int64_t value)7781 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7782 if (IsInt<32>(value)) {
7783 if (value == 0) {
7784 __ testq(dest, dest);
7785 } else {
7786 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7787 }
7788 } else {
7789 // Value won't fit in an int.
7790 __ cmpq(dest, LiteralInt64Address(value));
7791 }
7792 }
7793
GenerateIntCompare(Location lhs,Location rhs)7794 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7795 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7796 GenerateIntCompare(lhs_reg, rhs);
7797 }
7798
GenerateIntCompare(CpuRegister lhs,Location rhs)7799 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7800 if (rhs.IsConstant()) {
7801 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7802 Compare32BitValue(lhs, value);
7803 } else if (rhs.IsStackSlot()) {
7804 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7805 } else {
7806 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7807 }
7808 }
7809
GenerateLongCompare(Location lhs,Location rhs)7810 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7811 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7812 if (rhs.IsConstant()) {
7813 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7814 Compare64BitValue(lhs_reg, value);
7815 } else if (rhs.IsDoubleStackSlot()) {
7816 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7817 } else {
7818 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7819 }
7820 }
7821
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7822 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7823 Location index,
7824 ScaleFactor scale,
7825 uint32_t data_offset) {
7826 return index.IsConstant() ?
7827 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7828 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7829 }
7830
Store64BitValueToStack(Location dest,int64_t value)7831 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7832 DCHECK(dest.IsDoubleStackSlot());
7833 if (IsInt<32>(value)) {
7834 // Can move directly as an int32 constant.
7835 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7836 Immediate(static_cast<int32_t>(value)));
7837 } else {
7838 Load64BitValue(CpuRegister(TMP), value);
7839 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7840 }
7841 }
7842
7843 /**
7844 * Class to handle late fixup of offsets into constant area.
7845 */
7846 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7847 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7848 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7849 : codegen_(&codegen), offset_into_constant_area_(offset) {}
7850
7851 protected:
SetOffset(size_t offset)7852 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7853
7854 CodeGeneratorX86_64* codegen_;
7855
7856 private:
Process(const MemoryRegion & region,int pos)7857 void Process(const MemoryRegion& region, int pos) override {
7858 // Patch the correct offset for the instruction. We use the address of the
7859 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7860 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7861 int32_t relative_position = constant_offset - pos;
7862
7863 // Patch in the right value.
7864 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7865 }
7866
7867 // Location in constant area that the fixup refers to.
7868 size_t offset_into_constant_area_;
7869 };
7870
7871 /**
7872 t * Class to handle late fixup of offsets to a jump table that will be created in the
7873 * constant area.
7874 */
7875 class JumpTableRIPFixup : public RIPFixup {
7876 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7877 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7878 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7879
CreateJumpTable()7880 void CreateJumpTable() {
7881 X86_64Assembler* assembler = codegen_->GetAssembler();
7882
7883 // Ensure that the reference to the jump table has the correct offset.
7884 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7885 SetOffset(offset_in_constant_table);
7886
7887 // Compute the offset from the start of the function to this jump table.
7888 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7889
7890 // Populate the jump table with the correct values for the jump table.
7891 int32_t num_entries = switch_instr_->GetNumEntries();
7892 HBasicBlock* block = switch_instr_->GetBlock();
7893 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7894 // The value that we want is the target offset - the position of the table.
7895 for (int32_t i = 0; i < num_entries; i++) {
7896 HBasicBlock* b = successors[i];
7897 Label* l = codegen_->GetLabelOf(b);
7898 DCHECK(l->IsBound());
7899 int32_t offset_to_block = l->Position() - current_table_offset;
7900 assembler->AppendInt32(offset_to_block);
7901 }
7902 }
7903
7904 private:
7905 const HPackedSwitch* switch_instr_;
7906 };
7907
Finalize(CodeAllocator * allocator)7908 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7909 // Generate the constant area if needed.
7910 X86_64Assembler* assembler = GetAssembler();
7911 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7912 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7913 assembler->Align(4, 0);
7914 constant_area_start_ = assembler->CodeSize();
7915
7916 // Populate any jump tables.
7917 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7918 jump_table->CreateJumpTable();
7919 }
7920
7921 // And now add the constant area to the generated code.
7922 assembler->AddConstantArea();
7923 }
7924
7925 // And finish up.
7926 CodeGenerator::Finalize(allocator);
7927 }
7928
LiteralDoubleAddress(double v)7929 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7930 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7931 return Address::RIP(fixup);
7932 }
7933
LiteralFloatAddress(float v)7934 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7935 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7936 return Address::RIP(fixup);
7937 }
7938
LiteralInt32Address(int32_t v)7939 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7940 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7941 return Address::RIP(fixup);
7942 }
7943
LiteralInt64Address(int64_t v)7944 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7945 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7946 return Address::RIP(fixup);
7947 }
7948
7949 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7950 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7951 if (!trg.IsValid()) {
7952 DCHECK_EQ(type, DataType::Type::kVoid);
7953 return;
7954 }
7955
7956 DCHECK_NE(type, DataType::Type::kVoid);
7957
7958 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7959 if (trg.Equals(return_loc)) {
7960 return;
7961 }
7962
7963 // Let the parallel move resolver take care of all of this.
7964 HParallelMove parallel_move(GetGraph()->GetAllocator());
7965 parallel_move.AddMove(return_loc, trg, type, nullptr);
7966 GetMoveResolver()->EmitNativeCode(¶llel_move);
7967 }
7968
LiteralCaseTable(HPackedSwitch * switch_instr)7969 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7970 // Create a fixup to be used to create and address the jump table.
7971 JumpTableRIPFixup* table_fixup =
7972 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7973
7974 // We have to populate the jump tables.
7975 fixups_to_jump_tables_.push_back(table_fixup);
7976 return Address::RIP(table_fixup);
7977 }
7978
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7979 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7980 const Address& addr_high,
7981 int64_t v,
7982 HInstruction* instruction) {
7983 if (IsInt<32>(v)) {
7984 int32_t v_32 = v;
7985 __ movq(addr_low, Immediate(v_32));
7986 MaybeRecordImplicitNullCheck(instruction);
7987 } else {
7988 // Didn't fit in a register. Do it in pieces.
7989 int32_t low_v = Low32Bits(v);
7990 int32_t high_v = High32Bits(v);
7991 __ movl(addr_low, Immediate(low_v));
7992 MaybeRecordImplicitNullCheck(instruction);
7993 __ movl(addr_high, Immediate(high_v));
7994 }
7995 }
7996
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7997 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7998 const uint8_t* roots_data,
7999 const PatchInfo<Label>& info,
8000 uint64_t index_in_table) const {
8001 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8002 uintptr_t address =
8003 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8004 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8005 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8006 dchecked_integral_cast<uint32_t>(address);
8007 }
8008
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8009 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8010 for (const PatchInfo<Label>& info : jit_string_patches_) {
8011 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8012 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8013 PatchJitRootUse(code, roots_data, info, index_in_table);
8014 }
8015
8016 for (const PatchInfo<Label>& info : jit_class_patches_) {
8017 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8018 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8019 PatchJitRootUse(code, roots_data, info, index_in_table);
8020 }
8021 }
8022
CpuHasAvxFeatureFlag()8023 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8024 return codegen_->GetInstructionSetFeatures().HasAVX();
8025 }
8026
CpuHasAvx2FeatureFlag()8027 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8028 return codegen_->GetInstructionSetFeatures().HasAVX2();
8029 }
8030
CpuHasAvxFeatureFlag()8031 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8032 return codegen_->GetInstructionSetFeatures().HasAVX();
8033 }
8034
CpuHasAvx2FeatureFlag()8035 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8036 return codegen_->GetInstructionSetFeatures().HasAVX2();
8037 }
8038
8039 #undef __
8040
8041 } // namespace x86_64
8042 } // namespace art
8043