1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/object_reference.h"
39 #include "mirror/var_handle.h"
40 #include "optimizing/nodes.h"
41 #include "profiling_info_builder.h"
42 #include "scoped_thread_state_change-inl.h"
43 #include "thread.h"
44 #include "trace.h"
45 #include "utils/assembler.h"
46 #include "utils/stack_checks.h"
47 #include "utils/x86_64/assembler_x86_64.h"
48 #include "utils/x86_64/constants_x86_64.h"
49 #include "utils/x86_64/managed_register_x86_64.h"
50
51 namespace art HIDDEN {
52
53 template<class MirrorType>
54 class GcRoot;
55
56 namespace x86_64 {
57
58 static constexpr int kCurrentMethodStackOffset = 0;
59 static constexpr Register kMethodRegisterArgument = RDI;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67
68 static constexpr int kC2ConditionMask = 0x400;
69
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71 // Custom calling convention: RAX serves as both input and output.
72 RegisterSet caller_saves = RegisterSet::Empty();
73 caller_saves.Add(Location::RegisterLocation(RAX));
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
93 instruction_,
94 instruction_->GetDexPc(),
95 this);
96 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
97 }
98
IsFatal() const99 bool IsFatal() const override { return true; }
100
GetDescription() const101 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
102
103 private:
104 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
105 };
106
107 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
108 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)109 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
110
EmitNativeCode(CodeGenerator * codegen)111 void EmitNativeCode(CodeGenerator* codegen) override {
112 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
113 __ Bind(GetEntryLabel());
114 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
115 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
116 }
117
IsFatal() const118 bool IsFatal() const override { return true; }
119
GetDescription() const120 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
121
122 private:
123 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
124 };
125
126 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
127 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)128 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
129 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
130
EmitNativeCode(CodeGenerator * codegen)131 void EmitNativeCode(CodeGenerator* codegen) override {
132 __ Bind(GetEntryLabel());
133 if (type_ == DataType::Type::kInt32) {
134 if (is_div_) {
135 __ negl(cpu_reg_);
136 } else {
137 __ xorl(cpu_reg_, cpu_reg_);
138 }
139
140 } else {
141 DCHECK_EQ(DataType::Type::kInt64, type_);
142 if (is_div_) {
143 __ negq(cpu_reg_);
144 } else {
145 __ xorl(cpu_reg_, cpu_reg_);
146 }
147 }
148 __ jmp(GetExitLabel());
149 }
150
GetDescription() const151 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
152
153 private:
154 const CpuRegister cpu_reg_;
155 const DataType::Type type_;
156 const bool is_div_;
157 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
158 };
159
160 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
161 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)162 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
163 : SlowPathCode(instruction), successor_(successor) {}
164
EmitNativeCode(CodeGenerator * codegen)165 void EmitNativeCode(CodeGenerator* codegen) override {
166 LocationSummary* locations = instruction_->GetLocations();
167 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
168 __ Bind(GetEntryLabel());
169 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
170 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
171 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
172 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
173 if (successor_ == nullptr) {
174 __ jmp(GetReturnLabel());
175 } else {
176 __ jmp(x86_64_codegen->GetLabelOf(successor_));
177 }
178 }
179
GetReturnLabel()180 Label* GetReturnLabel() {
181 DCHECK(successor_ == nullptr);
182 return &return_label_;
183 }
184
GetSuccessor() const185 HBasicBlock* GetSuccessor() const {
186 return successor_;
187 }
188
GetDescription() const189 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
190
191 private:
192 HBasicBlock* const successor_;
193 Label return_label_;
194
195 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
196 };
197
198 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
199 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)200 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
201 : SlowPathCode(instruction) {}
202
EmitNativeCode(CodeGenerator * codegen)203 void EmitNativeCode(CodeGenerator* codegen) override {
204 LocationSummary* locations = instruction_->GetLocations();
205 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
206 __ Bind(GetEntryLabel());
207 if (instruction_->CanThrowIntoCatchBlock()) {
208 // Live registers will be restored in the catch block if caught.
209 SaveLiveRegisters(codegen, locations);
210 }
211
212 Location index_loc = locations->InAt(0);
213 Location length_loc = locations->InAt(1);
214 InvokeRuntimeCallingConvention calling_convention;
215 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
216 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
217
218 // Are we using an array length from memory?
219 if (!length_loc.IsValid()) {
220 DCHECK(instruction_->InputAt(1)->IsArrayLength());
221 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
222 DCHECK(array_length->IsEmittedAtUseSite());
223 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
224 Location array_loc = array_length->GetLocations()->InAt(0);
225 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
226 if (!index_loc.Equals(length_arg)) {
227 // The index is not clobbered by loading the length directly to `length_arg`.
228 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
229 x86_64_codegen->Move(index_arg, index_loc);
230 } else if (!array_loc.Equals(index_arg)) {
231 // The array reference is not clobbered by the index move.
232 x86_64_codegen->Move(index_arg, index_loc);
233 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
234 } else {
235 // Load the array length into `TMP`.
236 DCHECK(codegen->IsBlockedCoreRegister(TMP));
237 __ movl(CpuRegister(TMP), array_len);
238 // Single move to CPU register does not clobber `TMP`.
239 x86_64_codegen->Move(index_arg, index_loc);
240 __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
241 }
242 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
243 __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
244 }
245 } else {
246 // We're moving two locations to locations that could overlap,
247 // so we need a parallel move resolver.
248 codegen->EmitParallelMoves(
249 index_loc,
250 index_arg,
251 DataType::Type::kInt32,
252 length_loc,
253 length_arg,
254 DataType::Type::kInt32);
255 }
256
257 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
258 ? kQuickThrowStringBounds
259 : kQuickThrowArrayBounds;
260 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
261 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
262 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
263 }
264
IsFatal() const265 bool IsFatal() const override { return true; }
266
GetDescription() const267 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
268
269 private:
270 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
271 };
272
273 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
274 public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)275 explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
276
EmitNativeCode(CodeGenerator * codegen)277 void EmitNativeCode(CodeGenerator* codegen) override {
278 LocationSummary* locations = instruction_->GetLocations();
279 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
280
281 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
282 __ Bind(GetEntryLabel());
283 SaveLiveRegisters(codegen, locations);
284
285 const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
286 // Custom calling convention: RAX serves as both input and output.
287 __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
288 x86_64_codegen->InvokeRuntime(kQuickResolveMethodType,
289 instruction_,
290 instruction_->GetDexPc(),
291 this);
292 CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
293 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
294 RestoreLiveRegisters(codegen, locations);
295
296 __ jmp(GetExitLabel());
297 }
298
GetDescription() const299 const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
300
301 private:
302 DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
303 };
304
305 class LoadClassSlowPathX86_64 : public SlowPathCode {
306 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)307 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
308 : SlowPathCode(at), cls_(cls) {
309 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
310 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
311 }
312
EmitNativeCode(CodeGenerator * codegen)313 void EmitNativeCode(CodeGenerator* codegen) override {
314 LocationSummary* locations = instruction_->GetLocations();
315 Location out = locations->Out();
316 const uint32_t dex_pc = instruction_->GetDexPc();
317 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
318 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
319
320 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
321 __ Bind(GetEntryLabel());
322 SaveLiveRegisters(codegen, locations);
323
324 // Custom calling convention: RAX serves as both input and output.
325 if (must_resolve_type) {
326 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
327 x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
328 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
329 &cls_->GetDexFile()));
330 dex::TypeIndex type_index = cls_->GetTypeIndex();
331 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
332 if (cls_->NeedsAccessCheck()) {
333 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
334 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
335 } else {
336 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
337 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
338 }
339 // If we also must_do_clinit, the resolved type is now in the correct register.
340 } else {
341 DCHECK(must_do_clinit);
342 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
343 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
344 }
345 if (must_do_clinit) {
346 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
347 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
348 }
349
350 // Move the class to the desired location.
351 if (out.IsValid()) {
352 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
354 }
355
356 RestoreLiveRegisters(codegen, locations);
357 __ jmp(GetExitLabel());
358 }
359
GetDescription() const360 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
361
362 private:
363 // The class this slow path will load.
364 HLoadClass* const cls_;
365
366 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
367 };
368
369 class LoadStringSlowPathX86_64 : public SlowPathCode {
370 public:
LoadStringSlowPathX86_64(HLoadString * instruction)371 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
372
EmitNativeCode(CodeGenerator * codegen)373 void EmitNativeCode(CodeGenerator* codegen) override {
374 LocationSummary* locations = instruction_->GetLocations();
375 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376
377 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378 __ Bind(GetEntryLabel());
379 SaveLiveRegisters(codegen, locations);
380
381 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
382 // Custom calling convention: RAX serves as both input and output.
383 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
384 x86_64_codegen->InvokeRuntime(kQuickResolveString,
385 instruction_,
386 instruction_->GetDexPc(),
387 this);
388 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
389 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
390 RestoreLiveRegisters(codegen, locations);
391
392 __ jmp(GetExitLabel());
393 }
394
GetDescription() const395 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
396
397 private:
398 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
399 };
400
401 class TypeCheckSlowPathX86_64 : public SlowPathCode {
402 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)403 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
404 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
405
EmitNativeCode(CodeGenerator * codegen)406 void EmitNativeCode(CodeGenerator* codegen) override {
407 LocationSummary* locations = instruction_->GetLocations();
408 uint32_t dex_pc = instruction_->GetDexPc();
409 DCHECK(instruction_->IsCheckCast()
410 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
411
412 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
413 __ Bind(GetEntryLabel());
414
415 if (kPoisonHeapReferences &&
416 instruction_->IsCheckCast() &&
417 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
418 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
419 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
420 }
421
422 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
423 SaveLiveRegisters(codegen, locations);
424 }
425
426 // We're moving two locations to locations that could overlap, so we need a parallel
427 // move resolver.
428 InvokeRuntimeCallingConvention calling_convention;
429 codegen->EmitParallelMoves(locations->InAt(0),
430 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
431 DataType::Type::kReference,
432 locations->InAt(1),
433 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
434 DataType::Type::kReference);
435 if (instruction_->IsInstanceOf()) {
436 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
437 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
438 } else {
439 DCHECK(instruction_->IsCheckCast());
440 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
441 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
442 }
443
444 if (!is_fatal_) {
445 if (instruction_->IsInstanceOf()) {
446 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
447 }
448
449 RestoreLiveRegisters(codegen, locations);
450 __ jmp(GetExitLabel());
451 }
452 }
453
GetDescription() const454 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
455
IsFatal() const456 bool IsFatal() const override { return is_fatal_; }
457
458 private:
459 const bool is_fatal_;
460
461 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
462 };
463
464 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
465 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)466 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
467 : SlowPathCode(instruction) {}
468
EmitNativeCode(CodeGenerator * codegen)469 void EmitNativeCode(CodeGenerator* codegen) override {
470 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
471 __ Bind(GetEntryLabel());
472 LocationSummary* locations = instruction_->GetLocations();
473 SaveLiveRegisters(codegen, locations);
474 InvokeRuntimeCallingConvention calling_convention;
475 x86_64_codegen->Load32BitValue(
476 CpuRegister(calling_convention.GetRegisterAt(0)),
477 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
478 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
479 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
480 }
481
GetDescription() const482 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
483
484 private:
485 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
486 };
487
488 class ArraySetSlowPathX86_64 : public SlowPathCode {
489 public:
ArraySetSlowPathX86_64(HInstruction * instruction)490 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
491
EmitNativeCode(CodeGenerator * codegen)492 void EmitNativeCode(CodeGenerator* codegen) override {
493 LocationSummary* locations = instruction_->GetLocations();
494 __ Bind(GetEntryLabel());
495 SaveLiveRegisters(codegen, locations);
496
497 InvokeRuntimeCallingConvention calling_convention;
498 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
499 parallel_move.AddMove(
500 locations->InAt(0),
501 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
502 DataType::Type::kReference,
503 nullptr);
504 parallel_move.AddMove(
505 locations->InAt(1),
506 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
507 DataType::Type::kInt32,
508 nullptr);
509 parallel_move.AddMove(
510 locations->InAt(2),
511 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
512 DataType::Type::kReference,
513 nullptr);
514 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
515
516 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
517 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
518 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
519 RestoreLiveRegisters(codegen, locations);
520 __ jmp(GetExitLabel());
521 }
522
GetDescription() const523 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
524
525 private:
526 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
527 };
528
529 // Slow path marking an object reference `ref` during a read
530 // barrier. The field `obj.field` in the object `obj` holding this
531 // reference does not get updated by this slow path after marking (see
532 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
533 //
534 // This means that after the execution of this slow path, `ref` will
535 // always be up-to-date, but `obj.field` may not; i.e., after the
536 // flip, `ref` will be a to-space reference, but `obj.field` will
537 // probably still be a from-space reference (unless it gets updated by
538 // another thread, or if another thread installed another object
539 // reference (different from `ref`) in `obj.field`).
540 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
541 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)542 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
543 Location ref,
544 bool unpoison_ref_before_marking)
545 : SlowPathCode(instruction),
546 ref_(ref),
547 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
548 }
549
GetDescription() const550 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
551
EmitNativeCode(CodeGenerator * codegen)552 void EmitNativeCode(CodeGenerator* codegen) override {
553 DCHECK(codegen->EmitReadBarrier());
554 LocationSummary* locations = instruction_->GetLocations();
555 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
556 Register ref_reg = ref_cpu_reg.AsRegister();
557 DCHECK(locations->CanCall());
558 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
559 DCHECK(instruction_->IsInstanceFieldGet() ||
560 instruction_->IsStaticFieldGet() ||
561 instruction_->IsArrayGet() ||
562 instruction_->IsArraySet() ||
563 instruction_->IsLoadClass() ||
564 instruction_->IsLoadMethodType() ||
565 instruction_->IsLoadString() ||
566 instruction_->IsInstanceOf() ||
567 instruction_->IsCheckCast() ||
568 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
569 << "Unexpected instruction in read barrier marking slow path: "
570 << instruction_->DebugName();
571
572 __ Bind(GetEntryLabel());
573 if (unpoison_ref_before_marking_) {
574 // Object* ref = ref_addr->AsMirrorPtr()
575 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
576 }
577 // No need to save live registers; it's taken care of by the
578 // entrypoint. Also, there is no need to update the stack mask,
579 // as this runtime call will not trigger a garbage collection.
580 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
581 DCHECK_NE(ref_reg, RSP);
582 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583 // "Compact" slow path, saving two moves.
584 //
585 // Instead of using the standard runtime calling convention (input
586 // and output in R0):
587 //
588 // RDI <- ref
589 // RAX <- ReadBarrierMark(RDI)
590 // ref <- RAX
591 //
592 // we just use rX (the register containing `ref`) as input and output
593 // of a dedicated entrypoint:
594 //
595 // rX <- ReadBarrierMarkRegX(rX)
596 //
597 int32_t entry_point_offset =
598 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
599 // This runtime call does not require a stack map.
600 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
601 __ jmp(GetExitLabel());
602 }
603
604 private:
605 // The location (register) of the marked object reference.
606 const Location ref_;
607 // Should the reference in `ref_` be unpoisoned prior to marking it?
608 const bool unpoison_ref_before_marking_;
609
610 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
611 };
612
613 // Slow path marking an object reference `ref` during a read barrier,
614 // and if needed, atomically updating the field `obj.field` in the
615 // object `obj` holding this reference after marking (contrary to
616 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
617 // `obj.field`).
618 //
619 // This means that after the execution of this slow path, both `ref`
620 // and `obj.field` will be up-to-date; i.e., after the flip, both will
621 // hold the same to-space reference (unless another thread installed
622 // another object reference (different from `ref`) in `obj.field`).
623 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
624 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)625 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
626 Location ref,
627 CpuRegister obj,
628 const Address& field_addr,
629 bool unpoison_ref_before_marking,
630 CpuRegister temp1,
631 CpuRegister temp2)
632 : SlowPathCode(instruction),
633 ref_(ref),
634 obj_(obj),
635 field_addr_(field_addr),
636 unpoison_ref_before_marking_(unpoison_ref_before_marking),
637 temp1_(temp1),
638 temp2_(temp2) {
639 }
640
GetDescription() const641 const char* GetDescription() const override {
642 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
643 }
644
EmitNativeCode(CodeGenerator * codegen)645 void EmitNativeCode(CodeGenerator* codegen) override {
646 DCHECK(codegen->EmitReadBarrier());
647 LocationSummary* locations = instruction_->GetLocations();
648 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
649 Register ref_reg = ref_cpu_reg.AsRegister();
650 DCHECK(locations->CanCall());
651 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
652 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
653 << "Unexpected instruction in read barrier marking and field updating slow path: "
654 << instruction_->DebugName();
655 HInvoke* invoke = instruction_->AsInvoke();
656 DCHECK(IsUnsafeCASReference(invoke) ||
657 IsUnsafeGetAndSetReference(invoke) ||
658 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
659
660 __ Bind(GetEntryLabel());
661 if (unpoison_ref_before_marking_) {
662 // Object* ref = ref_addr->AsMirrorPtr()
663 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
664 }
665
666 // Save the old (unpoisoned) reference.
667 __ movl(temp1_, ref_cpu_reg);
668
669 // No need to save live registers; it's taken care of by the
670 // entrypoint. Also, there is no need to update the stack mask,
671 // as this runtime call will not trigger a garbage collection.
672 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
673 DCHECK_NE(ref_reg, RSP);
674 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
675 // "Compact" slow path, saving two moves.
676 //
677 // Instead of using the standard runtime calling convention (input
678 // and output in R0):
679 //
680 // RDI <- ref
681 // RAX <- ReadBarrierMark(RDI)
682 // ref <- RAX
683 //
684 // we just use rX (the register containing `ref`) as input and output
685 // of a dedicated entrypoint:
686 //
687 // rX <- ReadBarrierMarkRegX(rX)
688 //
689 int32_t entry_point_offset =
690 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
691 // This runtime call does not require a stack map.
692 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
693
694 // If the new reference is different from the old reference,
695 // update the field in the holder (`*field_addr`).
696 //
697 // Note that this field could also hold a different object, if
698 // another thread had concurrently changed it. In that case, the
699 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
700 // operation below would abort the CAS, leaving the field as-is.
701 NearLabel done;
702 __ cmpl(temp1_, ref_cpu_reg);
703 __ j(kEqual, &done);
704
705 // Update the holder's field atomically. This may fail if
706 // mutator updates before us, but it's OK. This is achived
707 // using a strong compare-and-set (CAS) operation with relaxed
708 // memory synchronization ordering, where the expected value is
709 // the old reference and the desired value is the new reference.
710 // This operation is implemented with a 32-bit LOCK CMPXLCHG
711 // instruction, which requires the expected value (the old
712 // reference) to be in EAX. Save RAX beforehand, and move the
713 // expected value (stored in `temp1_`) into EAX.
714 __ movq(temp2_, CpuRegister(RAX));
715 __ movl(CpuRegister(RAX), temp1_);
716
717 // Convenience aliases.
718 CpuRegister base = obj_;
719 CpuRegister expected = CpuRegister(RAX);
720 CpuRegister value = ref_cpu_reg;
721
722 bool base_equals_value = (base.AsRegister() == value.AsRegister());
723 Register value_reg = ref_reg;
724 if (kPoisonHeapReferences) {
725 if (base_equals_value) {
726 // If `base` and `value` are the same register location, move
727 // `value_reg` to a temporary register. This way, poisoning
728 // `value_reg` won't invalidate `base`.
729 value_reg = temp1_.AsRegister();
730 __ movl(CpuRegister(value_reg), base);
731 }
732
733 // Check that the register allocator did not assign the location
734 // of `expected` (RAX) to `value` nor to `base`, so that heap
735 // poisoning (when enabled) works as intended below.
736 // - If `value` were equal to `expected`, both references would
737 // be poisoned twice, meaning they would not be poisoned at
738 // all, as heap poisoning uses address negation.
739 // - If `base` were equal to `expected`, poisoning `expected`
740 // would invalidate `base`.
741 DCHECK_NE(value_reg, expected.AsRegister());
742 DCHECK_NE(base.AsRegister(), expected.AsRegister());
743
744 __ PoisonHeapReference(expected);
745 __ PoisonHeapReference(CpuRegister(value_reg));
746 }
747
748 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
749
750 // If heap poisoning is enabled, we need to unpoison the values
751 // that were poisoned earlier.
752 if (kPoisonHeapReferences) {
753 if (base_equals_value) {
754 // `value_reg` has been moved to a temporary register, no need
755 // to unpoison it.
756 } else {
757 __ UnpoisonHeapReference(CpuRegister(value_reg));
758 }
759 // No need to unpoison `expected` (RAX), as it is be overwritten below.
760 }
761
762 // Restore RAX.
763 __ movq(CpuRegister(RAX), temp2_);
764
765 __ Bind(&done);
766 __ jmp(GetExitLabel());
767 }
768
769 private:
770 // The location (register) of the marked object reference.
771 const Location ref_;
772 // The register containing the object holding the marked object reference field.
773 const CpuRegister obj_;
774 // The address of the marked reference field. The base of this address must be `obj_`.
775 const Address field_addr_;
776
777 // Should the reference in `ref_` be unpoisoned prior to marking it?
778 const bool unpoison_ref_before_marking_;
779
780 const CpuRegister temp1_;
781 const CpuRegister temp2_;
782
783 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
784 };
785
786 // Slow path generating a read barrier for a heap reference.
787 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
788 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)789 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
790 Location out,
791 Location ref,
792 Location obj,
793 uint32_t offset,
794 Location index)
795 : SlowPathCode(instruction),
796 out_(out),
797 ref_(ref),
798 obj_(obj),
799 offset_(offset),
800 index_(index) {
801 // If `obj` is equal to `out` or `ref`, it means the initial
802 // object has been overwritten by (or after) the heap object
803 // reference load to be instrumented, e.g.:
804 //
805 // __ movl(out, Address(out, offset));
806 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
807 //
808 // In that case, we have lost the information about the original
809 // object, and the emitted read barrier cannot work properly.
810 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
811 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
812 }
813
EmitNativeCode(CodeGenerator * codegen)814 void EmitNativeCode(CodeGenerator* codegen) override {
815 DCHECK(codegen->EmitReadBarrier());
816 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
817 LocationSummary* locations = instruction_->GetLocations();
818 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
819 DCHECK(locations->CanCall());
820 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
821 DCHECK(instruction_->IsInstanceFieldGet() ||
822 instruction_->IsStaticFieldGet() ||
823 instruction_->IsArrayGet() ||
824 instruction_->IsInstanceOf() ||
825 instruction_->IsCheckCast() ||
826 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
827 << "Unexpected instruction in read barrier for heap reference slow path: "
828 << instruction_->DebugName();
829
830 __ Bind(GetEntryLabel());
831 SaveLiveRegisters(codegen, locations);
832
833 // We may have to change the index's value, but as `index_` is a
834 // constant member (like other "inputs" of this slow path),
835 // introduce a copy of it, `index`.
836 Location index = index_;
837 if (index_.IsValid()) {
838 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
839 if (instruction_->IsArrayGet()) {
840 // Compute real offset and store it in index_.
841 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
842 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
843 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
844 // We are about to change the value of `index_reg` (see the
845 // calls to art::x86_64::X86_64Assembler::shll and
846 // art::x86_64::X86_64Assembler::AddImmediate below), but it
847 // has not been saved by the previous call to
848 // art::SlowPathCode::SaveLiveRegisters, as it is a
849 // callee-save register --
850 // art::SlowPathCode::SaveLiveRegisters does not consider
851 // callee-save registers, as it has been designed with the
852 // assumption that callee-save registers are supposed to be
853 // handled by the called function. So, as a callee-save
854 // register, `index_reg` _would_ eventually be saved onto
855 // the stack, but it would be too late: we would have
856 // changed its value earlier. Therefore, we manually save
857 // it here into another freely available register,
858 // `free_reg`, chosen of course among the caller-save
859 // registers (as a callee-save `free_reg` register would
860 // exhibit the same problem).
861 //
862 // Note we could have requested a temporary register from
863 // the register allocator instead; but we prefer not to, as
864 // this is a slow path, and we know we can find a
865 // caller-save register that is available.
866 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
867 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
868 index_reg = free_reg;
869 index = Location::RegisterLocation(index_reg);
870 } else {
871 // The initial register stored in `index_` has already been
872 // saved in the call to art::SlowPathCode::SaveLiveRegisters
873 // (as it is not a callee-save register), so we can freely
874 // use it.
875 }
876 // Shifting the index value contained in `index_reg` by the
877 // scale factor (2) cannot overflow in practice, as the
878 // runtime is unable to allocate object arrays with a size
879 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
880 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
881 static_assert(
882 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
883 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
884 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
885 } else {
886 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
887 // intrinsics, `index_` is not shifted by a scale factor of 2
888 // (as in the case of ArrayGet), as it is actually an offset
889 // to an object field within an object.
890 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
891 DCHECK(instruction_->GetLocations()->Intrinsified());
892 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
893 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
894 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
895 (instruction_->AsInvoke()->GetIntrinsic() ==
896 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
897 (instruction_->AsInvoke()->GetIntrinsic() ==
898 Intrinsics::kJdkUnsafeGetReferenceAcquire))
899 << instruction_->AsInvoke()->GetIntrinsic();
900 DCHECK_EQ(offset_, 0U);
901 DCHECK(index_.IsRegister());
902 }
903 }
904
905 // We're moving two or three locations to locations that could
906 // overlap, so we need a parallel move resolver.
907 InvokeRuntimeCallingConvention calling_convention;
908 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
909 parallel_move.AddMove(ref_,
910 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
911 DataType::Type::kReference,
912 nullptr);
913 parallel_move.AddMove(obj_,
914 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
915 DataType::Type::kReference,
916 nullptr);
917 if (index.IsValid()) {
918 parallel_move.AddMove(index,
919 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
920 DataType::Type::kInt32,
921 nullptr);
922 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
923 } else {
924 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
925 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
926 }
927 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
928 instruction_,
929 instruction_->GetDexPc(),
930 this);
931 CheckEntrypointTypes<
932 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
933 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
934
935 RestoreLiveRegisters(codegen, locations);
936 __ jmp(GetExitLabel());
937 }
938
GetDescription() const939 const char* GetDescription() const override {
940 return "ReadBarrierForHeapReferenceSlowPathX86_64";
941 }
942
943 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)944 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
945 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
946 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
947 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
948 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
949 return static_cast<CpuRegister>(i);
950 }
951 }
952 // We shall never fail to find a free caller-save register, as
953 // there are more than two core caller-save registers on x86-64
954 // (meaning it is possible to find one which is different from
955 // `ref` and `obj`).
956 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
957 LOG(FATAL) << "Could not find a free caller-save register";
958 UNREACHABLE();
959 }
960
961 const Location out_;
962 const Location ref_;
963 const Location obj_;
964 const uint32_t offset_;
965 // An additional location containing an index to an array.
966 // Only used for HArrayGet and the UnsafeGetObject &
967 // UnsafeGetObjectVolatile intrinsics.
968 const Location index_;
969
970 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
971 };
972
973 // Slow path generating a read barrier for a GC root.
974 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
975 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)976 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
977 : SlowPathCode(instruction), out_(out), root_(root) {
978 }
979
EmitNativeCode(CodeGenerator * codegen)980 void EmitNativeCode(CodeGenerator* codegen) override {
981 DCHECK(codegen->EmitReadBarrier());
982 LocationSummary* locations = instruction_->GetLocations();
983 DCHECK(locations->CanCall());
984 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
985 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
986 << "Unexpected instruction in read barrier for GC root slow path: "
987 << instruction_->DebugName();
988
989 __ Bind(GetEntryLabel());
990 SaveLiveRegisters(codegen, locations);
991
992 InvokeRuntimeCallingConvention calling_convention;
993 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
994 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
995 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
996 instruction_,
997 instruction_->GetDexPc(),
998 this);
999 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1000 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
1001
1002 RestoreLiveRegisters(codegen, locations);
1003 __ jmp(GetExitLabel());
1004 }
1005
GetDescription() const1006 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
1007
1008 private:
1009 const Location out_;
1010 const Location root_;
1011
1012 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
1013 };
1014
1015 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
1016 public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1017 explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1018 : SlowPathCode(instruction) {}
1019
EmitNativeCode(CodeGenerator * codegen)1020 void EmitNativeCode(CodeGenerator* codegen) override {
1021 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1022 LocationSummary* locations = instruction_->GetLocations();
1023 QuickEntrypointEnum entry_point =
1024 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1025 __ Bind(GetEntryLabel());
1026 SaveLiveRegisters(codegen, locations);
1027 if (instruction_->IsMethodExitHook()) {
1028 // Load FrameSize to pass to the exit hook.
1029 __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1030 }
1031 x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
1032 RestoreLiveRegisters(codegen, locations);
1033 __ jmp(GetExitLabel());
1034 }
1035
GetDescription() const1036 const char* GetDescription() const override {
1037 return "MethodEntryExitHooksSlowPath";
1038 }
1039
1040 private:
1041 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1042 };
1043
1044 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1045 public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1046 CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1047 : SlowPathCode(suspend_check),
1048 counter_address_(counter_address) {}
1049
EmitNativeCode(CodeGenerator * codegen)1050 void EmitNativeCode(CodeGenerator* codegen) override {
1051 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1052 __ Bind(GetEntryLabel());
1053 __ movq(CpuRegister(TMP), Immediate(counter_address_));
1054 __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1055 if (instruction_ != nullptr) {
1056 // Only saves full width XMM for SIMD.
1057 SaveLiveRegisters(codegen, instruction_->GetLocations());
1058 }
1059 x86_64_codegen->GenerateInvokeRuntime(
1060 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1061 if (instruction_ != nullptr) {
1062 // Only restores full width XMM for SIMD.
1063 RestoreLiveRegisters(codegen, instruction_->GetLocations());
1064 }
1065 __ jmp(GetExitLabel());
1066 }
1067
GetDescription() const1068 const char* GetDescription() const override {
1069 return "CompileOptimizedSlowPath";
1070 }
1071
1072 private:
1073 uint64_t counter_address_;
1074
1075 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1076 };
1077
1078 #undef __
1079 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1080 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
1081
X86_64IntegerCondition(IfCondition cond)1082 inline Condition X86_64IntegerCondition(IfCondition cond) {
1083 switch (cond) {
1084 case kCondEQ: return kEqual;
1085 case kCondNE: return kNotEqual;
1086 case kCondLT: return kLess;
1087 case kCondLE: return kLessEqual;
1088 case kCondGT: return kGreater;
1089 case kCondGE: return kGreaterEqual;
1090 case kCondB: return kBelow;
1091 case kCondBE: return kBelowEqual;
1092 case kCondA: return kAbove;
1093 case kCondAE: return kAboveEqual;
1094 }
1095 LOG(FATAL) << "Unreachable";
1096 UNREACHABLE();
1097 }
1098
1099 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1100 inline Condition X86_64FPCondition(IfCondition cond) {
1101 switch (cond) {
1102 case kCondEQ: return kEqual;
1103 case kCondNE: return kNotEqual;
1104 case kCondLT: return kBelow;
1105 case kCondLE: return kBelowEqual;
1106 case kCondGT: return kAbove;
1107 case kCondGE: return kAboveEqual;
1108 default: break; // should not happen
1109 }
1110 LOG(FATAL) << "Unreachable";
1111 UNREACHABLE();
1112 }
1113
BlockNonVolatileXmmRegisters(LocationSummary * locations)1114 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1115 // We have to ensure that the native code we call directly (such as @CriticalNative
1116 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1117 // which are non-volatile for ART, but volatile for Native calls. This will ensure
1118 // that they are saved in the prologue and properly restored.
1119 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1120 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1121 }
1122 }
1123
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1124 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1125 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1126 [[maybe_unused]] ArtMethod* method) {
1127 return desired_dispatch_info;
1128 }
1129
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1130 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1131 switch (load_kind) {
1132 case MethodLoadKind::kBootImageLinkTimePcRelative:
1133 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1134 __ leal(temp.AsRegister<CpuRegister>(),
1135 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1136 RecordBootImageMethodPatch(invoke);
1137 break;
1138 case MethodLoadKind::kBootImageRelRo: {
1139 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1140 __ movl(temp.AsRegister<CpuRegister>(),
1141 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1142 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1143 break;
1144 }
1145 case MethodLoadKind::kBssEntry: {
1146 __ movq(temp.AsRegister<CpuRegister>(),
1147 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1148 RecordMethodBssEntryPatch(invoke);
1149 // No need for memory fence, thanks to the x86-64 memory model.
1150 break;
1151 }
1152 case MethodLoadKind::kJitDirectAddress: {
1153 Load64BitValue(temp.AsRegister<CpuRegister>(),
1154 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1155 break;
1156 }
1157 case MethodLoadKind::kRuntimeCall: {
1158 // Test situation, don't do anything.
1159 break;
1160 }
1161 default: {
1162 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1163 UNREACHABLE();
1164 }
1165 }
1166 }
1167
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1168 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1169 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1170 // All registers are assumed to be correctly set up.
1171
1172 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1173 switch (invoke->GetMethodLoadKind()) {
1174 case MethodLoadKind::kStringInit: {
1175 // temp = thread->string_init_entrypoint
1176 uint32_t offset =
1177 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1178 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1179 break;
1180 }
1181 case MethodLoadKind::kRecursive: {
1182 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1183 break;
1184 }
1185 case MethodLoadKind::kRuntimeCall: {
1186 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1187 return; // No code pointer retrieval; the runtime performs the call directly.
1188 }
1189 case MethodLoadKind::kBootImageLinkTimePcRelative:
1190 // For kCallCriticalNative we skip loading the method and do the call directly.
1191 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1192 break;
1193 }
1194 FALLTHROUGH_INTENDED;
1195 default: {
1196 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1197 break;
1198 }
1199 }
1200
1201 switch (invoke->GetCodePtrLocation()) {
1202 case CodePtrLocation::kCallSelf:
1203 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1204 __ call(&frame_entry_label_);
1205 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1206 break;
1207 case CodePtrLocation::kCallCriticalNative: {
1208 size_t out_frame_size =
1209 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1210 kNativeStackAlignment,
1211 GetCriticalNativeDirectCallFrameSize>(invoke);
1212 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1213 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1214 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1215 RecordBootImageJniEntrypointPatch(invoke);
1216 } else {
1217 // (callee_method + offset_of_jni_entry_point)()
1218 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1219 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1220 }
1221 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1222 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1223 switch (invoke->GetType()) {
1224 case DataType::Type::kBool:
1225 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1226 break;
1227 case DataType::Type::kInt8:
1228 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1229 break;
1230 case DataType::Type::kUint16:
1231 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1232 break;
1233 case DataType::Type::kInt16:
1234 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1235 break;
1236 case DataType::Type::kInt32:
1237 case DataType::Type::kInt64:
1238 case DataType::Type::kFloat32:
1239 case DataType::Type::kFloat64:
1240 case DataType::Type::kVoid:
1241 break;
1242 default:
1243 DCHECK(false) << invoke->GetType();
1244 break;
1245 }
1246 if (out_frame_size != 0u) {
1247 DecreaseFrame(out_frame_size);
1248 }
1249 break;
1250 }
1251 case CodePtrLocation::kCallArtMethod:
1252 // (callee_method + offset_of_quick_compiled_code)()
1253 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1254 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1255 kX86_64PointerSize).SizeValue()));
1256 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1257 break;
1258 }
1259
1260 DCHECK(!IsLeafMethod());
1261 }
1262
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1263 void CodeGeneratorX86_64::GenerateVirtualCall(
1264 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1265 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1266 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1267 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1268
1269 // Use the calling convention instead of the location of the receiver, as
1270 // intrinsics may have put the receiver in a different register. In the intrinsics
1271 // slow path, the arguments have been moved to the right place, so here we are
1272 // guaranteed that the receiver is the first register of the calling convention.
1273 InvokeDexCallingConvention calling_convention;
1274 Register receiver = calling_convention.GetRegisterAt(0);
1275
1276 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1277 // /* HeapReference<Class> */ temp = receiver->klass_
1278 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1279 MaybeRecordImplicitNullCheck(invoke);
1280 // Instead of simply (possibly) unpoisoning `temp` here, we should
1281 // emit a read barrier for the previous class reference load.
1282 // However this is not required in practice, as this is an
1283 // intermediate/temporary reference and because the current
1284 // concurrent copying collector keeps the from-space memory
1285 // intact/accessible until the end of the marking phase (the
1286 // concurrent copying collector may not in the future).
1287 __ MaybeUnpoisonHeapReference(temp);
1288
1289 MaybeGenerateInlineCacheCheck(invoke, temp);
1290
1291 // temp = temp->GetMethodAt(method_offset);
1292 __ movq(temp, Address(temp, method_offset));
1293 // call temp->GetEntryPoint();
1294 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1295 kX86_64PointerSize).SizeValue()));
1296 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1297 }
1298
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1299 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1300 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1301 __ Bind(&boot_image_other_patches_.back().label);
1302 }
1303
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1304 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1305 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1306 __ Bind(&boot_image_other_patches_.back().label);
1307 }
1308
RecordBootImageMethodPatch(HInvoke * invoke)1309 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1310 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1311 invoke->GetResolvedMethodReference().index);
1312 __ Bind(&boot_image_method_patches_.back().label);
1313 }
1314
RecordMethodBssEntryPatch(HInvoke * invoke)1315 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1316 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1317 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1318 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1319 invoke->GetMethodReference().dex_file));
1320 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1321 invoke->GetMethodReference().index);
1322 __ Bind(&method_bss_entry_patches_.back().label);
1323 }
1324
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1325 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1326 dex::TypeIndex type_index) {
1327 boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1328 __ Bind(&boot_image_type_patches_.back().label);
1329 }
1330
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1331 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1332 dex::TypeIndex type_index) {
1333 app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1334 __ Bind(&app_image_type_patches_.back().label);
1335 }
1336
NewTypeBssEntryPatch(HLoadClass * load_class)1337 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1338 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1339 switch (load_class->GetLoadKind()) {
1340 case HLoadClass::LoadKind::kBssEntry:
1341 patches = &type_bss_entry_patches_;
1342 break;
1343 case HLoadClass::LoadKind::kBssEntryPublic:
1344 patches = &public_type_bss_entry_patches_;
1345 break;
1346 case HLoadClass::LoadKind::kBssEntryPackage:
1347 patches = &package_type_bss_entry_patches_;
1348 break;
1349 default:
1350 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1351 UNREACHABLE();
1352 }
1353 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1354 return &patches->back().label;
1355 }
1356
RecordBootImageStringPatch(HLoadString * load_string)1357 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1358 boot_image_string_patches_.emplace_back(
1359 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1360 __ Bind(&boot_image_string_patches_.back().label);
1361 }
1362
NewStringBssEntryPatch(HLoadString * load_string)1363 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1364 string_bss_entry_patches_.emplace_back(
1365 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1366 return &string_bss_entry_patches_.back().label;
1367 }
1368
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1369 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1370 method_type_bss_entry_patches_.emplace_back(
1371 &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1372 return &method_type_bss_entry_patches_.back().label;
1373 }
1374
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1375 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1376 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1377 invoke->GetResolvedMethodReference().index);
1378 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1379 }
1380
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1381 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1382 if (GetCompilerOptions().IsBootImage()) {
1383 __ leal(reg,
1384 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1385 RecordBootImageIntrinsicPatch(boot_image_reference);
1386 } else if (GetCompilerOptions().GetCompilePic()) {
1387 __ movl(reg,
1388 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1389 RecordBootImageRelRoPatch(boot_image_reference);
1390 } else {
1391 DCHECK(GetCompilerOptions().IsJitCompiler());
1392 gc::Heap* heap = Runtime::Current()->GetHeap();
1393 DCHECK(!heap->GetBootImageSpaces().empty());
1394 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1395 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1396 }
1397 }
1398
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1399 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1400 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1401 if (GetCompilerOptions().IsBootImage()) {
1402 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1403 __ leal(reg,
1404 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1405 MethodReference target_method = invoke->GetResolvedMethodReference();
1406 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1407 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1408 __ Bind(&boot_image_type_patches_.back().label);
1409 } else {
1410 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1411 LoadBootImageAddress(reg, boot_image_offset);
1412 }
1413 }
1414
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1415 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1416 if (GetCompilerOptions().IsBootImage()) {
1417 ScopedObjectAccess soa(Thread::Current());
1418 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1419 boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1420 __ Bind(&boot_image_type_patches_.back().label);
1421 } else {
1422 uint32_t boot_image_offset = GetBootImageOffset(class_root);
1423 LoadBootImageAddress(reg, boot_image_offset);
1424 }
1425 }
1426
1427 // The label points to the end of the "movl" or another instruction but the literal offset
1428 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1429 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1430
1431 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1432 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1433 const ArenaDeque<PatchInfo<Label>>& infos,
1434 ArenaVector<linker::LinkerPatch>* linker_patches) {
1435 for (const PatchInfo<Label>& info : infos) {
1436 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1437 linker_patches->push_back(
1438 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1439 }
1440 }
1441
1442 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1443 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1444 const DexFile* target_dex_file,
1445 uint32_t pc_insn_offset,
1446 uint32_t boot_image_offset) {
1447 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1448 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1449 }
1450
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1451 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1452 DCHECK(linker_patches->empty());
1453 size_t size =
1454 boot_image_method_patches_.size() +
1455 method_bss_entry_patches_.size() +
1456 boot_image_type_patches_.size() +
1457 app_image_type_patches_.size() +
1458 type_bss_entry_patches_.size() +
1459 public_type_bss_entry_patches_.size() +
1460 package_type_bss_entry_patches_.size() +
1461 boot_image_string_patches_.size() +
1462 string_bss_entry_patches_.size() +
1463 method_type_bss_entry_patches_.size() +
1464 boot_image_jni_entrypoint_patches_.size() +
1465 boot_image_other_patches_.size();
1466 linker_patches->reserve(size);
1467 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1468 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1469 boot_image_method_patches_, linker_patches);
1470 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1471 boot_image_type_patches_, linker_patches);
1472 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1473 boot_image_string_patches_, linker_patches);
1474 } else {
1475 DCHECK(boot_image_method_patches_.empty());
1476 DCHECK(boot_image_type_patches_.empty());
1477 DCHECK(boot_image_string_patches_.empty());
1478 }
1479 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1480 if (GetCompilerOptions().IsBootImage()) {
1481 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1482 boot_image_other_patches_, linker_patches);
1483 } else {
1484 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1485 boot_image_other_patches_, linker_patches);
1486 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1487 app_image_type_patches_, linker_patches);
1488 }
1489 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1490 method_bss_entry_patches_, linker_patches);
1491 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1492 type_bss_entry_patches_, linker_patches);
1493 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1494 public_type_bss_entry_patches_, linker_patches);
1495 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1496 package_type_bss_entry_patches_, linker_patches);
1497 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1498 string_bss_entry_patches_, linker_patches);
1499 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1500 method_type_bss_entry_patches_, linker_patches);
1501 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1502 boot_image_jni_entrypoint_patches_, linker_patches);
1503 DCHECK_EQ(size, linker_patches->size());
1504 }
1505
DumpCoreRegister(std::ostream & stream,int reg) const1506 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1507 stream << Register(reg);
1508 }
1509
DumpFloatingPointRegister(std::ostream & stream,int reg) const1510 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1511 stream << FloatRegister(reg);
1512 }
1513
GetInstructionSetFeatures() const1514 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1515 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1516 }
1517
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1518 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1519 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1520 return kX86_64WordSize;
1521 }
1522
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1523 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1524 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1525 return kX86_64WordSize;
1526 }
1527
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1528 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1529 if (GetGraph()->HasSIMD()) {
1530 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1531 } else {
1532 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1533 }
1534 return GetSlowPathFPWidth();
1535 }
1536
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1537 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1538 if (GetGraph()->HasSIMD()) {
1539 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1540 } else {
1541 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542 }
1543 return GetSlowPathFPWidth();
1544 }
1545
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1546 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1547 HInstruction* instruction,
1548 uint32_t dex_pc,
1549 SlowPathCode* slow_path) {
1550 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1551 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1552 if (EntrypointRequiresStackMap(entrypoint)) {
1553 RecordPcInfo(instruction, dex_pc, slow_path);
1554 }
1555 }
1556
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1557 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1558 HInstruction* instruction,
1559 SlowPathCode* slow_path) {
1560 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1561 GenerateInvokeRuntime(entry_point_offset);
1562 }
1563
GenerateInvokeRuntime(int32_t entry_point_offset)1564 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1565 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1566 }
1567
1568 namespace detail {
1569
1570 // Mark which intrinsics we don't have handcrafted code for.
1571 template <Intrinsics T>
1572 struct IsUnimplemented {
1573 bool is_unimplemented = false;
1574 };
1575
1576 #define TRUE_OVERRIDE(Name) \
1577 template <> \
1578 struct IsUnimplemented<Intrinsics::k##Name> { \
1579 bool is_unimplemented = true; \
1580 };
1581 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1582 #undef TRUE_OVERRIDE
1583
1584 static constexpr bool kIsIntrinsicUnimplemented[] = {
1585 false, // kNone
1586 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1587 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1588 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1589 #undef IS_UNIMPLEMENTED
1590 };
1591
1592 } // namespace detail
1593
1594 static constexpr int kNumberOfCpuRegisterPairs = 0;
1595 // Use a fake return address register to mimic Quick.
1596 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1597 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1598 const CompilerOptions& compiler_options,
1599 OptimizingCompilerStats* stats)
1600 : CodeGenerator(graph,
1601 kNumberOfCpuRegisters,
1602 kNumberOfFloatRegisters,
1603 kNumberOfCpuRegisterPairs,
1604 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1605 | (1 << kFakeReturnRegister),
1606 ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1607 compiler_options,
1608 stats,
1609 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1610 block_labels_(nullptr),
1611 location_builder_(graph, this),
1612 instruction_visitor_(graph, this),
1613 move_resolver_(graph->GetAllocator(), this),
1614 assembler_(graph->GetAllocator(),
1615 compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1616 constant_area_start_(0),
1617 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1618 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1619 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1620 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1621 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1622 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1623 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1624 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1625 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1626 method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1627 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1628 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1629 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1630 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1631 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1632 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1633 }
1634
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1635 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1636 CodeGeneratorX86_64* codegen)
1637 : InstructionCodeGenerator(graph, codegen),
1638 assembler_(codegen->GetAssembler()),
1639 codegen_(codegen) {}
1640
SetupBlockedRegisters() const1641 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1642 // Stack register is always reserved.
1643 blocked_core_registers_[RSP] = true;
1644
1645 // Block the register used as TMP.
1646 blocked_core_registers_[TMP] = true;
1647 }
1648
DWARFReg(Register reg)1649 static dwarf::Reg DWARFReg(Register reg) {
1650 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1651 }
1652
DWARFReg(FloatRegister reg)1653 static dwarf::Reg DWARFReg(FloatRegister reg) {
1654 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1655 }
1656
VisitMethodEntryHook(HMethodEntryHook * method_hook)1657 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1658 LocationSummary* locations = new (GetGraph()->GetAllocator())
1659 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1660 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1661 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1662 locations->AddTemp(Location::RegisterLocation(RAX));
1663 locations->AddTemp(Location::RegisterLocation(RDX));
1664 }
1665
GenerateMethodEntryExitHook(HInstruction * instruction)1666 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1667 SlowPathCode* slow_path =
1668 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1669 LocationSummary* locations = instruction->GetLocations();
1670 codegen_->AddSlowPath(slow_path);
1671
1672 if (instruction->IsMethodExitHook()) {
1673 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1674 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1675 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1676 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1677 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1678 __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1679 Immediate(0));
1680 __ j(kNotEqual, slow_path->GetEntryLabel());
1681 }
1682
1683 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1684 MemberOffset offset = instruction->IsMethodExitHook() ?
1685 instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1686 : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1687 __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1688 __ cmpb(Address(CpuRegister(TMP), 0),
1689 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1690 // Check if there are any method entry / exit listeners. If no, continue with execution.
1691 __ j(kLess, slow_path->GetExitLabel());
1692 // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1693 __ j(kGreater, slow_path->GetEntryLabel());
1694
1695 // Check if there is place in the buffer for a new entry, if no, take slow path.
1696 CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>();
1697 CpuRegister entry_addr = CpuRegister(TMP);
1698 uint64_t trace_buffer_index_offset =
1699 Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue();
1700 __ gs()->movq(CpuRegister(index),
1701 Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true));
1702 __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock));
1703 __ j(kLess, slow_path->GetEntryLabel());
1704
1705 // Update the index in the `Thread`.
1706 __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true),
1707 CpuRegister(index));
1708 // Calculate the entry address in the buffer.
1709 // entry_addr = base_addr + sizeof(void*) * index
1710 __ gs()->movq(entry_addr,
1711 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1712 /* no_rip= */ true));
1713 __ leaq(CpuRegister(entry_addr),
1714 Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0));
1715
1716 // Record method pointer and action.
1717 CpuRegister method = index;
1718 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1719 // Use last two bits to encode trace method action. For MethodEntry it is 0
1720 // so no need to set the bits since they are 0 already.
1721 if (instruction->IsMethodExitHook()) {
1722 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1723 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1724 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1725 __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1726 }
1727 __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method));
1728 // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1729 __ rdtsc();
1730 __ shlq(CpuRegister(RDX), Immediate(32));
1731 __ orq(CpuRegister(RAX), CpuRegister(RDX));
1732 __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX));
1733 __ Bind(slow_path->GetExitLabel());
1734 }
1735
VisitMethodEntryHook(HMethodEntryHook * instruction)1736 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1737 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1738 DCHECK(codegen_->RequiresCurrentMethod());
1739 GenerateMethodEntryExitHook(instruction);
1740 }
1741
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1742 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1743 switch (instr->InputAt(0)->GetType()) {
1744 case DataType::Type::kReference:
1745 case DataType::Type::kBool:
1746 case DataType::Type::kUint8:
1747 case DataType::Type::kInt8:
1748 case DataType::Type::kUint16:
1749 case DataType::Type::kInt16:
1750 case DataType::Type::kInt32:
1751 case DataType::Type::kInt64:
1752 locations->SetInAt(0, Location::RegisterLocation(RAX));
1753 break;
1754
1755 case DataType::Type::kFloat32:
1756 case DataType::Type::kFloat64:
1757 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1758 break;
1759
1760 case DataType::Type::kVoid:
1761 locations->SetInAt(0, Location::NoLocation());
1762 break;
1763
1764 default:
1765 LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1766 }
1767 }
1768
VisitMethodExitHook(HMethodExitHook * method_hook)1769 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1770 LocationSummary* locations = new (GetGraph()->GetAllocator())
1771 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1772 SetInForReturnValue(method_hook, locations);
1773 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1774 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1775 locations->AddTemp(Location::RegisterLocation(RAX));
1776 locations->AddTemp(Location::RegisterLocation(RDX));
1777 }
1778
VisitMethodExitHook(HMethodExitHook * instruction)1779 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1780 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1781 DCHECK(codegen_->RequiresCurrentMethod());
1782 GenerateMethodEntryExitHook(instruction);
1783 }
1784
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1785 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1786 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1787 NearLabel overflow;
1788 Register method = kMethodRegisterArgument;
1789 if (!is_frame_entry) {
1790 CHECK(RequiresCurrentMethod());
1791 method = TMP;
1792 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1793 }
1794 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1795 Immediate(interpreter::kNterpHotnessValue));
1796 __ j(kEqual, &overflow);
1797 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1798 Immediate(-1));
1799 __ Bind(&overflow);
1800 }
1801
1802 if (GetGraph()->IsCompilingBaseline() &&
1803 GetGraph()->IsUsefulOptimizing() &&
1804 !Runtime::Current()->IsAotCompiler()) {
1805 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1806 DCHECK(info != nullptr);
1807 CHECK(!HasEmptyFrame());
1808 uint64_t address = reinterpret_cast64<uint64_t>(info) +
1809 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1810 SlowPathCode* slow_path =
1811 new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1812 AddSlowPath(slow_path);
1813 // Note: if the address was in the 32bit range, we could use
1814 // Address::Absolute and avoid this movq.
1815 __ movq(CpuRegister(TMP), Immediate(address));
1816 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1817 // it reaching 0. Also, at this point we have no register available to look
1818 // at the counter directly.
1819 __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1820 __ j(kEqual, slow_path->GetEntryLabel());
1821 __ Bind(slow_path->GetExitLabel());
1822 }
1823 }
1824
GenerateFrameEntry()1825 void CodeGeneratorX86_64::GenerateFrameEntry() {
1826 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1827
1828 // Check if we need to generate the clinit check. We will jump to the
1829 // resolution stub if the class is not initialized and the executing thread is
1830 // not the thread initializing it.
1831 // We do this before constructing the frame to get the correct stack trace if
1832 // an exception is thrown.
1833 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1834 NearLabel resolution;
1835 // Check if we're visibly initialized.
1836
1837 // We don't emit a read barrier here to save on code size. We rely on the
1838 // resolution trampoline to do a suspend check before re-entering this code.
1839 __ movl(CpuRegister(TMP),
1840 Address(CpuRegister(kMethodRegisterArgument),
1841 ArtMethod::DeclaringClassOffset().Int32Value()));
1842 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1843 Immediate(kShiftedVisiblyInitializedValue));
1844 __ j(kAboveEqual, &frame_entry_label_);
1845
1846 // Check if we're initializing and the thread initializing is the one
1847 // executing the code.
1848 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1849 Immediate(kShiftedInitializingValue));
1850 __ j(kBelow, &resolution);
1851
1852 __ movl(CpuRegister(TMP),
1853 Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1854 __ gs()->cmpl(
1855 CpuRegister(TMP),
1856 Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1857 __ j(kEqual, &frame_entry_label_);
1858 __ Bind(&resolution);
1859
1860 // Jump to the resolution stub.
1861 ThreadOffset64 entrypoint_offset =
1862 GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1863 __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1864 }
1865
1866 __ Bind(&frame_entry_label_);
1867 bool skip_overflow_check = IsLeafMethod()
1868 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1869 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1870
1871
1872 if (!skip_overflow_check) {
1873 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1874 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1875 RecordPcInfo(nullptr, 0);
1876 }
1877
1878 if (!HasEmptyFrame()) {
1879 // Make sure the frame size isn't unreasonably large.
1880 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1881
1882 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1883 Register reg = kCoreCalleeSaves[i];
1884 if (allocated_registers_.ContainsCoreRegister(reg)) {
1885 __ pushq(CpuRegister(reg));
1886 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1887 __ cfi().RelOffset(DWARFReg(reg), 0);
1888 }
1889 }
1890
1891 int adjust = GetFrameSize() - GetCoreSpillSize();
1892 IncreaseFrame(adjust);
1893 uint32_t xmm_spill_location = GetFpuSpillStart();
1894 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1895
1896 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1897 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1898 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1899 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1900 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1901 }
1902 }
1903
1904 // Save the current method if we need it. Note that we do not
1905 // do this in HCurrentMethod, as the instruction might have been removed
1906 // in the SSA graph.
1907 if (RequiresCurrentMethod()) {
1908 CHECK(!HasEmptyFrame());
1909 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1910 CpuRegister(kMethodRegisterArgument));
1911 }
1912
1913 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1914 CHECK(!HasEmptyFrame());
1915 // Initialize should_deoptimize flag to 0.
1916 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1917 }
1918 }
1919
1920 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1921 }
1922
GenerateFrameExit()1923 void CodeGeneratorX86_64::GenerateFrameExit() {
1924 __ cfi().RememberState();
1925 if (!HasEmptyFrame()) {
1926 uint32_t xmm_spill_location = GetFpuSpillStart();
1927 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1928 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1929 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1930 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1931 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1932 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1933 }
1934 }
1935
1936 int adjust = GetFrameSize() - GetCoreSpillSize();
1937 DecreaseFrame(adjust);
1938
1939 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1940 Register reg = kCoreCalleeSaves[i];
1941 if (allocated_registers_.ContainsCoreRegister(reg)) {
1942 __ popq(CpuRegister(reg));
1943 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1944 __ cfi().Restore(DWARFReg(reg));
1945 }
1946 }
1947 }
1948 __ ret();
1949 __ cfi().RestoreState();
1950 __ cfi().DefCFAOffset(GetFrameSize());
1951 }
1952
Bind(HBasicBlock * block)1953 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1954 __ Bind(GetLabelOf(block));
1955 }
1956
Move(Location destination,Location source)1957 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1958 if (source.Equals(destination)) {
1959 return;
1960 }
1961 if (destination.IsRegister()) {
1962 CpuRegister dest = destination.AsRegister<CpuRegister>();
1963 if (source.IsRegister()) {
1964 __ movq(dest, source.AsRegister<CpuRegister>());
1965 } else if (source.IsFpuRegister()) {
1966 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1967 } else if (source.IsStackSlot()) {
1968 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1969 } else if (source.IsConstant()) {
1970 HConstant* constant = source.GetConstant();
1971 if (constant->IsLongConstant()) {
1972 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1973 } else if (constant->IsDoubleConstant()) {
1974 Load64BitValue(dest, GetInt64ValueOf(constant));
1975 } else {
1976 Load32BitValue(dest, GetInt32ValueOf(constant));
1977 }
1978 } else {
1979 DCHECK(source.IsDoubleStackSlot());
1980 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1981 }
1982 } else if (destination.IsFpuRegister()) {
1983 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1984 if (source.IsRegister()) {
1985 __ movd(dest, source.AsRegister<CpuRegister>());
1986 } else if (source.IsFpuRegister()) {
1987 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1988 } else if (source.IsConstant()) {
1989 HConstant* constant = source.GetConstant();
1990 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1991 if (constant->IsFloatConstant()) {
1992 Load32BitValue(dest, static_cast<int32_t>(value));
1993 } else {
1994 Load64BitValue(dest, value);
1995 }
1996 } else if (source.IsStackSlot()) {
1997 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1998 } else {
1999 DCHECK(source.IsDoubleStackSlot());
2000 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2001 }
2002 } else if (destination.IsStackSlot()) {
2003 if (source.IsRegister()) {
2004 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2005 source.AsRegister<CpuRegister>());
2006 } else if (source.IsFpuRegister()) {
2007 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2008 source.AsFpuRegister<XmmRegister>());
2009 } else if (source.IsConstant()) {
2010 HConstant* constant = source.GetConstant();
2011 int32_t value = GetInt32ValueOf(constant);
2012 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2013 } else {
2014 DCHECK(source.IsStackSlot()) << source;
2015 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2016 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2017 }
2018 } else {
2019 DCHECK(destination.IsDoubleStackSlot());
2020 if (source.IsRegister()) {
2021 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2022 source.AsRegister<CpuRegister>());
2023 } else if (source.IsFpuRegister()) {
2024 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2025 source.AsFpuRegister<XmmRegister>());
2026 } else if (source.IsConstant()) {
2027 HConstant* constant = source.GetConstant();
2028 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2029 int64_t value = GetInt64ValueOf(constant);
2030 Store64BitValueToStack(destination, value);
2031 } else {
2032 DCHECK(source.IsDoubleStackSlot());
2033 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2034 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2035 }
2036 }
2037 }
2038
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2039 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2040 Location dst,
2041 Address src) {
2042 switch (type) {
2043 case DataType::Type::kBool:
2044 case DataType::Type::kUint8:
2045 __ movzxb(dst.AsRegister<CpuRegister>(), src);
2046 break;
2047 case DataType::Type::kInt8:
2048 __ movsxb(dst.AsRegister<CpuRegister>(), src);
2049 break;
2050 case DataType::Type::kUint16:
2051 __ movzxw(dst.AsRegister<CpuRegister>(), src);
2052 break;
2053 case DataType::Type::kInt16:
2054 __ movsxw(dst.AsRegister<CpuRegister>(), src);
2055 break;
2056 case DataType::Type::kInt32:
2057 case DataType::Type::kUint32:
2058 __ movl(dst.AsRegister<CpuRegister>(), src);
2059 break;
2060 case DataType::Type::kInt64:
2061 case DataType::Type::kUint64:
2062 __ movq(dst.AsRegister<CpuRegister>(), src);
2063 break;
2064 case DataType::Type::kFloat32:
2065 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2066 break;
2067 case DataType::Type::kFloat64:
2068 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2069 break;
2070 case DataType::Type::kVoid:
2071 case DataType::Type::kReference:
2072 LOG(FATAL) << "Unreachable type " << type;
2073 UNREACHABLE();
2074 }
2075 }
2076
MoveConstant(Location location,int32_t value)2077 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2078 DCHECK(location.IsRegister());
2079 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2080 }
2081
MoveLocation(Location dst,Location src,DataType::Type dst_type)2082 void CodeGeneratorX86_64::MoveLocation(Location dst,
2083 Location src,
2084 [[maybe_unused]] DataType::Type dst_type) {
2085 Move(dst, src);
2086 }
2087
AddLocationAsTemp(Location location,LocationSummary * locations)2088 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2089 if (location.IsRegister()) {
2090 locations->AddTemp(location);
2091 } else {
2092 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2093 }
2094 }
2095
HandleGoto(HInstruction * got,HBasicBlock * successor)2096 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2097 if (successor->IsExitBlock()) {
2098 DCHECK(got->GetPrevious()->AlwaysThrows());
2099 return; // no code needed
2100 }
2101
2102 HBasicBlock* block = got->GetBlock();
2103 HInstruction* previous = got->GetPrevious();
2104
2105 HLoopInformation* info = block->GetLoopInformation();
2106 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2107 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2108 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2109 return;
2110 }
2111
2112 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2113 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2114 }
2115 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2116 __ jmp(codegen_->GetLabelOf(successor));
2117 }
2118 }
2119
VisitGoto(HGoto * got)2120 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2121 got->SetLocations(nullptr);
2122 }
2123
VisitGoto(HGoto * got)2124 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2125 HandleGoto(got, got->GetSuccessor());
2126 }
2127
VisitTryBoundary(HTryBoundary * try_boundary)2128 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2129 try_boundary->SetLocations(nullptr);
2130 }
2131
VisitTryBoundary(HTryBoundary * try_boundary)2132 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2133 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2134 if (!successor->IsExitBlock()) {
2135 HandleGoto(try_boundary, successor);
2136 }
2137 }
2138
VisitExit(HExit * exit)2139 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2140 exit->SetLocations(nullptr);
2141 }
2142
VisitExit(HExit * exit)2143 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2144
2145 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2146 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2147 LabelType* true_label,
2148 LabelType* false_label) {
2149 if (cond->IsFPConditionTrueIfNaN()) {
2150 __ j(kUnordered, true_label);
2151 } else if (cond->IsFPConditionFalseIfNaN()) {
2152 __ j(kUnordered, false_label);
2153 }
2154 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2155 }
2156
GenerateCompareTest(HCondition * condition)2157 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2158 LocationSummary* locations = condition->GetLocations();
2159
2160 Location left = locations->InAt(0);
2161 Location right = locations->InAt(1);
2162 DataType::Type type = condition->InputAt(0)->GetType();
2163 switch (type) {
2164 case DataType::Type::kBool:
2165 case DataType::Type::kUint8:
2166 case DataType::Type::kInt8:
2167 case DataType::Type::kUint16:
2168 case DataType::Type::kInt16:
2169 case DataType::Type::kInt32:
2170 case DataType::Type::kReference: {
2171 codegen_->GenerateIntCompare(left, right);
2172 break;
2173 }
2174 case DataType::Type::kInt64: {
2175 codegen_->GenerateLongCompare(left, right);
2176 break;
2177 }
2178 case DataType::Type::kFloat32: {
2179 if (right.IsFpuRegister()) {
2180 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2181 } else if (right.IsConstant()) {
2182 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2183 codegen_->LiteralFloatAddress(
2184 right.GetConstant()->AsFloatConstant()->GetValue()));
2185 } else {
2186 DCHECK(right.IsStackSlot());
2187 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2188 Address(CpuRegister(RSP), right.GetStackIndex()));
2189 }
2190 break;
2191 }
2192 case DataType::Type::kFloat64: {
2193 if (right.IsFpuRegister()) {
2194 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2195 } else if (right.IsConstant()) {
2196 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2197 codegen_->LiteralDoubleAddress(
2198 right.GetConstant()->AsDoubleConstant()->GetValue()));
2199 } else {
2200 DCHECK(right.IsDoubleStackSlot());
2201 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2202 Address(CpuRegister(RSP), right.GetStackIndex()));
2203 }
2204 break;
2205 }
2206 default:
2207 LOG(FATAL) << "Unexpected condition type " << type;
2208 }
2209 }
2210
2211 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2212 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2213 LabelType* true_target_in,
2214 LabelType* false_target_in) {
2215 // Generated branching requires both targets to be explicit. If either of the
2216 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2217 LabelType fallthrough_target;
2218 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2219 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2220
2221 // Generate the comparison to set the CC.
2222 GenerateCompareTest(condition);
2223
2224 // Now generate the correct jump(s).
2225 DataType::Type type = condition->InputAt(0)->GetType();
2226 switch (type) {
2227 case DataType::Type::kInt64: {
2228 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2229 break;
2230 }
2231 case DataType::Type::kFloat32: {
2232 GenerateFPJumps(condition, true_target, false_target);
2233 break;
2234 }
2235 case DataType::Type::kFloat64: {
2236 GenerateFPJumps(condition, true_target, false_target);
2237 break;
2238 }
2239 default:
2240 LOG(FATAL) << "Unexpected condition type " << type;
2241 }
2242
2243 if (false_target != &fallthrough_target) {
2244 __ jmp(false_target);
2245 }
2246
2247 if (fallthrough_target.IsLinked()) {
2248 __ Bind(&fallthrough_target);
2249 }
2250 }
2251
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2252 static bool AreEflagsSetFrom(HInstruction* cond,
2253 HInstruction* branch,
2254 const CompilerOptions& compiler_options) {
2255 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2256 // are set only strictly before `branch`. We can't use the eflags on long
2257 // conditions if they are materialized due to the complex branching.
2258 return cond->IsCondition() &&
2259 cond->GetNext() == branch &&
2260 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2261 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2262 compiler_options.ProfileBranches());
2263 }
2264
2265 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2266 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2267 size_t condition_input_index,
2268 LabelType* true_target,
2269 LabelType* false_target) {
2270 HInstruction* cond = instruction->InputAt(condition_input_index);
2271
2272 if (true_target == nullptr && false_target == nullptr) {
2273 // Nothing to do. The code always falls through.
2274 return;
2275 } else if (cond->IsIntConstant()) {
2276 // Constant condition, statically compared against "true" (integer value 1).
2277 if (cond->AsIntConstant()->IsTrue()) {
2278 if (true_target != nullptr) {
2279 __ jmp(true_target);
2280 }
2281 } else {
2282 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2283 if (false_target != nullptr) {
2284 __ jmp(false_target);
2285 }
2286 }
2287 return;
2288 }
2289
2290 // The following code generates these patterns:
2291 // (1) true_target == nullptr && false_target != nullptr
2292 // - opposite condition true => branch to false_target
2293 // (2) true_target != nullptr && false_target == nullptr
2294 // - condition true => branch to true_target
2295 // (3) true_target != nullptr && false_target != nullptr
2296 // - condition true => branch to true_target
2297 // - branch to false_target
2298 if (IsBooleanValueOrMaterializedCondition(cond)) {
2299 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2300 if (true_target == nullptr) {
2301 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2302 } else {
2303 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2304 }
2305 } else {
2306 // Materialized condition, compare against 0.
2307 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2308 if (lhs.IsRegister()) {
2309 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2310 } else {
2311 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2312 }
2313 if (true_target == nullptr) {
2314 __ j(kEqual, false_target);
2315 } else {
2316 __ j(kNotEqual, true_target);
2317 }
2318 }
2319 } else {
2320 // Condition has not been materialized, use its inputs as the
2321 // comparison and its condition as the branch condition.
2322 HCondition* condition = cond->AsCondition();
2323
2324 // If this is a long or FP comparison that has been folded into
2325 // the HCondition, generate the comparison directly.
2326 DataType::Type type = condition->InputAt(0)->GetType();
2327 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2328 GenerateCompareTestAndBranch(condition, true_target, false_target);
2329 return;
2330 }
2331
2332 Location lhs = condition->GetLocations()->InAt(0);
2333 Location rhs = condition->GetLocations()->InAt(1);
2334 codegen_->GenerateIntCompare(lhs, rhs);
2335 if (true_target == nullptr) {
2336 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2337 } else {
2338 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2339 }
2340 }
2341
2342 // If neither branch falls through (case 3), the conditional branch to `true_target`
2343 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2344 if (true_target != nullptr && false_target != nullptr) {
2345 __ jmp(false_target);
2346 }
2347 }
2348
VisitIf(HIf * if_instr)2349 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2350 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2351 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2352 if (GetGraph()->IsCompilingBaseline() &&
2353 codegen_->GetCompilerOptions().ProfileBranches() &&
2354 !Runtime::Current()->IsAotCompiler()) {
2355 locations->SetInAt(0, Location::RequiresRegister());
2356 locations->AddTemp(Location::RequiresRegister());
2357 } else {
2358 locations->SetInAt(0, Location::Any());
2359 }
2360 }
2361 }
2362
VisitIf(HIf * if_instr)2363 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2364 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2365 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2366 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2367 nullptr : codegen_->GetLabelOf(true_successor);
2368 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2369 nullptr : codegen_->GetLabelOf(false_successor);
2370 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2371 if (GetGraph()->IsCompilingBaseline() &&
2372 codegen_->GetCompilerOptions().ProfileBranches() &&
2373 !Runtime::Current()->IsAotCompiler()) {
2374 DCHECK(if_instr->InputAt(0)->IsCondition());
2375 CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2376 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2377 DCHECK(info != nullptr);
2378 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2379 // Currently, not all If branches are profiled.
2380 if (cache != nullptr) {
2381 uint64_t address =
2382 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2383 static_assert(
2384 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2385 "Unexpected offsets for BranchCache");
2386 NearLabel done;
2387 Location lhs = if_instr->GetLocations()->InAt(0);
2388 __ movq(CpuRegister(TMP), Immediate(address));
2389 __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2390 __ addw(temp, Immediate(1));
2391 __ j(kZero, &done);
2392 __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2393 __ Bind(&done);
2394 }
2395 }
2396 }
2397 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2398 }
2399
VisitDeoptimize(HDeoptimize * deoptimize)2400 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2401 LocationSummary* locations = new (GetGraph()->GetAllocator())
2402 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2403 InvokeRuntimeCallingConvention calling_convention;
2404 RegisterSet caller_saves = RegisterSet::Empty();
2405 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2406 locations->SetCustomSlowPathCallerSaves(caller_saves);
2407 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2408 locations->SetInAt(0, Location::Any());
2409 }
2410 }
2411
VisitDeoptimize(HDeoptimize * deoptimize)2412 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2413 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2414 GenerateTestAndBranch<Label>(deoptimize,
2415 /* condition_input_index= */ 0,
2416 slow_path->GetEntryLabel(),
2417 /* false_target= */ nullptr);
2418 }
2419
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2420 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2421 LocationSummary* locations = new (GetGraph()->GetAllocator())
2422 LocationSummary(flag, LocationSummary::kNoCall);
2423 locations->SetOut(Location::RequiresRegister());
2424 }
2425
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2426 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2427 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2428 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2429 }
2430
SelectCanUseCMOV(HSelect * select)2431 static bool SelectCanUseCMOV(HSelect* select) {
2432 // There are no conditional move instructions for XMMs.
2433 if (DataType::IsFloatingPointType(select->GetType())) {
2434 return false;
2435 }
2436
2437 // A FP condition doesn't generate the single CC that we need.
2438 HInstruction* condition = select->GetCondition();
2439 if (condition->IsCondition() &&
2440 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2441 return false;
2442 }
2443
2444 // We can generate a CMOV for this Select.
2445 return true;
2446 }
2447
VisitSelect(HSelect * select)2448 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2449 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2450 if (DataType::IsFloatingPointType(select->GetType())) {
2451 locations->SetInAt(0, Location::RequiresFpuRegister());
2452 locations->SetInAt(1, Location::Any());
2453 } else {
2454 locations->SetInAt(0, Location::RequiresRegister());
2455 if (SelectCanUseCMOV(select)) {
2456 if (select->InputAt(1)->IsConstant()) {
2457 locations->SetInAt(1, Location::RequiresRegister());
2458 } else {
2459 locations->SetInAt(1, Location::Any());
2460 }
2461 } else {
2462 locations->SetInAt(1, Location::Any());
2463 }
2464 }
2465 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2466 locations->SetInAt(2, Location::RequiresRegister());
2467 }
2468 locations->SetOut(Location::SameAsFirstInput());
2469 }
2470
VisitSelect(HSelect * select)2471 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2472 LocationSummary* locations = select->GetLocations();
2473 if (SelectCanUseCMOV(select)) {
2474 // If both the condition and the source types are integer, we can generate
2475 // a CMOV to implement Select.
2476 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2477 Location value_true_loc = locations->InAt(1);
2478 DCHECK(locations->InAt(0).Equals(locations->Out()));
2479
2480 HInstruction* select_condition = select->GetCondition();
2481 Condition cond = kNotEqual;
2482
2483 // Figure out how to test the 'condition'.
2484 if (select_condition->IsCondition()) {
2485 HCondition* condition = select_condition->AsCondition();
2486 if (!condition->IsEmittedAtUseSite()) {
2487 // This was a previously materialized condition.
2488 // Can we use the existing condition code?
2489 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2490 // Materialization was the previous instruction. Condition codes are right.
2491 cond = X86_64IntegerCondition(condition->GetCondition());
2492 } else {
2493 // No, we have to recreate the condition code.
2494 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2495 __ testl(cond_reg, cond_reg);
2496 }
2497 } else {
2498 GenerateCompareTest(condition);
2499 cond = X86_64IntegerCondition(condition->GetCondition());
2500 }
2501 } else {
2502 // Must be a Boolean condition, which needs to be compared to 0.
2503 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2504 __ testl(cond_reg, cond_reg);
2505 }
2506
2507 // If the condition is true, overwrite the output, which already contains false.
2508 // Generate the correct sized CMOV.
2509 bool is_64_bit = DataType::Is64BitType(select->GetType());
2510 if (value_true_loc.IsRegister()) {
2511 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2512 } else {
2513 __ cmov(cond,
2514 value_false,
2515 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2516 }
2517 } else {
2518 NearLabel false_target;
2519 GenerateTestAndBranch<NearLabel>(select,
2520 /* condition_input_index= */ 2,
2521 /* true_target= */ nullptr,
2522 &false_target);
2523 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2524 __ Bind(&false_target);
2525 }
2526 }
2527
VisitNop(HNop * nop)2528 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2529 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2530 }
2531
VisitNop(HNop *)2532 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2533 // The environment recording already happened in CodeGenerator::Compile.
2534 }
2535
IncreaseFrame(size_t adjustment)2536 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2537 __ subq(CpuRegister(RSP), Immediate(adjustment));
2538 __ cfi().AdjustCFAOffset(adjustment);
2539 }
2540
DecreaseFrame(size_t adjustment)2541 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2542 __ addq(CpuRegister(RSP), Immediate(adjustment));
2543 __ cfi().AdjustCFAOffset(-adjustment);
2544 }
2545
GenerateNop()2546 void CodeGeneratorX86_64::GenerateNop() {
2547 __ nop();
2548 }
2549
HandleCondition(HCondition * cond)2550 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2551 LocationSummary* locations =
2552 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2553 // Handle the long/FP comparisons made in instruction simplification.
2554 switch (cond->InputAt(0)->GetType()) {
2555 case DataType::Type::kInt64:
2556 locations->SetInAt(0, Location::RequiresRegister());
2557 locations->SetInAt(1, Location::Any());
2558 break;
2559 case DataType::Type::kFloat32:
2560 case DataType::Type::kFloat64:
2561 locations->SetInAt(0, Location::RequiresFpuRegister());
2562 locations->SetInAt(1, Location::Any());
2563 break;
2564 default:
2565 locations->SetInAt(0, Location::RequiresRegister());
2566 locations->SetInAt(1, Location::Any());
2567 break;
2568 }
2569 if (!cond->IsEmittedAtUseSite()) {
2570 locations->SetOut(Location::RequiresRegister());
2571 }
2572 }
2573
HandleCondition(HCondition * cond)2574 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2575 if (cond->IsEmittedAtUseSite()) {
2576 return;
2577 }
2578
2579 LocationSummary* locations = cond->GetLocations();
2580 Location lhs = locations->InAt(0);
2581 Location rhs = locations->InAt(1);
2582 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2583 NearLabel true_label, false_label;
2584
2585 switch (cond->InputAt(0)->GetType()) {
2586 default:
2587 // Integer case.
2588
2589 // Clear output register: setcc only sets the low byte.
2590 __ xorl(reg, reg);
2591
2592 codegen_->GenerateIntCompare(lhs, rhs);
2593 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2594 return;
2595 case DataType::Type::kInt64:
2596 // Clear output register: setcc only sets the low byte.
2597 __ xorl(reg, reg);
2598
2599 codegen_->GenerateLongCompare(lhs, rhs);
2600 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2601 return;
2602 case DataType::Type::kFloat32: {
2603 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2604 if (rhs.IsConstant()) {
2605 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2606 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2607 } else if (rhs.IsStackSlot()) {
2608 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2609 } else {
2610 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2611 }
2612 GenerateFPJumps(cond, &true_label, &false_label);
2613 break;
2614 }
2615 case DataType::Type::kFloat64: {
2616 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2617 if (rhs.IsConstant()) {
2618 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2619 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2620 } else if (rhs.IsDoubleStackSlot()) {
2621 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2622 } else {
2623 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2624 }
2625 GenerateFPJumps(cond, &true_label, &false_label);
2626 break;
2627 }
2628 }
2629
2630 // Convert the jumps into the result.
2631 NearLabel done_label;
2632
2633 // False case: result = 0.
2634 __ Bind(&false_label);
2635 __ xorl(reg, reg);
2636 __ jmp(&done_label);
2637
2638 // True case: result = 1.
2639 __ Bind(&true_label);
2640 __ movl(reg, Immediate(1));
2641 __ Bind(&done_label);
2642 }
2643
VisitEqual(HEqual * comp)2644 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2645 HandleCondition(comp);
2646 }
2647
VisitEqual(HEqual * comp)2648 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2649 HandleCondition(comp);
2650 }
2651
VisitNotEqual(HNotEqual * comp)2652 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2653 HandleCondition(comp);
2654 }
2655
VisitNotEqual(HNotEqual * comp)2656 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2657 HandleCondition(comp);
2658 }
2659
VisitLessThan(HLessThan * comp)2660 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2661 HandleCondition(comp);
2662 }
2663
VisitLessThan(HLessThan * comp)2664 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2665 HandleCondition(comp);
2666 }
2667
VisitLessThanOrEqual(HLessThanOrEqual * comp)2668 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2669 HandleCondition(comp);
2670 }
2671
VisitLessThanOrEqual(HLessThanOrEqual * comp)2672 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2673 HandleCondition(comp);
2674 }
2675
VisitGreaterThan(HGreaterThan * comp)2676 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2677 HandleCondition(comp);
2678 }
2679
VisitGreaterThan(HGreaterThan * comp)2680 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2681 HandleCondition(comp);
2682 }
2683
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2684 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2685 HandleCondition(comp);
2686 }
2687
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2688 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2689 HandleCondition(comp);
2690 }
2691
VisitBelow(HBelow * comp)2692 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2693 HandleCondition(comp);
2694 }
2695
VisitBelow(HBelow * comp)2696 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2697 HandleCondition(comp);
2698 }
2699
VisitBelowOrEqual(HBelowOrEqual * comp)2700 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2701 HandleCondition(comp);
2702 }
2703
VisitBelowOrEqual(HBelowOrEqual * comp)2704 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2705 HandleCondition(comp);
2706 }
2707
VisitAbove(HAbove * comp)2708 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2709 HandleCondition(comp);
2710 }
2711
VisitAbove(HAbove * comp)2712 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2713 HandleCondition(comp);
2714 }
2715
VisitAboveOrEqual(HAboveOrEqual * comp)2716 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2717 HandleCondition(comp);
2718 }
2719
VisitAboveOrEqual(HAboveOrEqual * comp)2720 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2721 HandleCondition(comp);
2722 }
2723
VisitCompare(HCompare * compare)2724 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2725 LocationSummary* locations =
2726 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2727 switch (compare->InputAt(0)->GetType()) {
2728 case DataType::Type::kBool:
2729 case DataType::Type::kUint8:
2730 case DataType::Type::kInt8:
2731 case DataType::Type::kUint16:
2732 case DataType::Type::kInt16:
2733 case DataType::Type::kInt32:
2734 case DataType::Type::kInt64: {
2735 locations->SetInAt(0, Location::RequiresRegister());
2736 locations->SetInAt(1, Location::Any());
2737 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2738 break;
2739 }
2740 case DataType::Type::kFloat32:
2741 case DataType::Type::kFloat64: {
2742 locations->SetInAt(0, Location::RequiresFpuRegister());
2743 locations->SetInAt(1, Location::Any());
2744 locations->SetOut(Location::RequiresRegister());
2745 break;
2746 }
2747 default:
2748 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2749 }
2750 }
2751
VisitCompare(HCompare * compare)2752 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2753 LocationSummary* locations = compare->GetLocations();
2754 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2755 Location left = locations->InAt(0);
2756 Location right = locations->InAt(1);
2757
2758 NearLabel less, greater, done;
2759 DataType::Type type = compare->InputAt(0)->GetType();
2760 Condition less_cond = kLess;
2761
2762 switch (type) {
2763 case DataType::Type::kBool:
2764 case DataType::Type::kUint8:
2765 case DataType::Type::kInt8:
2766 case DataType::Type::kUint16:
2767 case DataType::Type::kInt16:
2768 case DataType::Type::kInt32: {
2769 codegen_->GenerateIntCompare(left, right);
2770 break;
2771 }
2772 case DataType::Type::kInt64: {
2773 codegen_->GenerateLongCompare(left, right);
2774 break;
2775 }
2776 case DataType::Type::kFloat32: {
2777 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2778 if (right.IsConstant()) {
2779 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2780 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2781 } else if (right.IsStackSlot()) {
2782 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2783 } else {
2784 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2785 }
2786 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2787 less_cond = kBelow; // ucomis{s,d} sets CF
2788 break;
2789 }
2790 case DataType::Type::kFloat64: {
2791 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2792 if (right.IsConstant()) {
2793 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2794 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2795 } else if (right.IsDoubleStackSlot()) {
2796 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2797 } else {
2798 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2799 }
2800 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2801 less_cond = kBelow; // ucomis{s,d} sets CF
2802 break;
2803 }
2804 default:
2805 LOG(FATAL) << "Unexpected compare type " << type;
2806 }
2807
2808 __ movl(out, Immediate(0));
2809 __ j(kEqual, &done);
2810 __ j(less_cond, &less);
2811
2812 __ Bind(&greater);
2813 __ movl(out, Immediate(1));
2814 __ jmp(&done);
2815
2816 __ Bind(&less);
2817 __ movl(out, Immediate(-1));
2818
2819 __ Bind(&done);
2820 }
2821
VisitIntConstant(HIntConstant * constant)2822 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2823 LocationSummary* locations =
2824 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2825 locations->SetOut(Location::ConstantLocation(constant));
2826 }
2827
VisitIntConstant(HIntConstant * constant)2828 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2829 // Will be generated at use site.
2830 }
2831
VisitNullConstant(HNullConstant * constant)2832 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2833 LocationSummary* locations =
2834 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2835 locations->SetOut(Location::ConstantLocation(constant));
2836 }
2837
VisitNullConstant(HNullConstant * constant)2838 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2839 // Will be generated at use site.
2840 }
2841
VisitLongConstant(HLongConstant * constant)2842 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2843 LocationSummary* locations =
2844 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2845 locations->SetOut(Location::ConstantLocation(constant));
2846 }
2847
VisitLongConstant(HLongConstant * constant)2848 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2849 // Will be generated at use site.
2850 }
2851
VisitFloatConstant(HFloatConstant * constant)2852 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2853 LocationSummary* locations =
2854 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2855 locations->SetOut(Location::ConstantLocation(constant));
2856 }
2857
VisitFloatConstant(HFloatConstant * constant)2858 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2859 // Will be generated at use site.
2860 }
2861
VisitDoubleConstant(HDoubleConstant * constant)2862 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2863 LocationSummary* locations =
2864 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2865 locations->SetOut(Location::ConstantLocation(constant));
2866 }
2867
VisitDoubleConstant(HDoubleConstant * constant)2868 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2869 [[maybe_unused]] HDoubleConstant* constant) {
2870 // Will be generated at use site.
2871 }
2872
VisitConstructorFence(HConstructorFence * constructor_fence)2873 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2874 constructor_fence->SetLocations(nullptr);
2875 }
2876
VisitConstructorFence(HConstructorFence * constructor_fence)2877 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2878 [[maybe_unused]] HConstructorFence* constructor_fence) {
2879 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2880 }
2881
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2882 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2883 memory_barrier->SetLocations(nullptr);
2884 }
2885
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2886 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2887 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2888 }
2889
VisitReturnVoid(HReturnVoid * ret)2890 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2891 ret->SetLocations(nullptr);
2892 }
2893
VisitReturnVoid(HReturnVoid * ret)2894 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2895 codegen_->GenerateFrameExit();
2896 }
2897
VisitReturn(HReturn * ret)2898 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2899 LocationSummary* locations =
2900 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2901 SetInForReturnValue(ret, locations);
2902 }
2903
VisitReturn(HReturn * ret)2904 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2905 switch (ret->InputAt(0)->GetType()) {
2906 case DataType::Type::kReference:
2907 case DataType::Type::kBool:
2908 case DataType::Type::kUint8:
2909 case DataType::Type::kInt8:
2910 case DataType::Type::kUint16:
2911 case DataType::Type::kInt16:
2912 case DataType::Type::kInt32:
2913 case DataType::Type::kInt64:
2914 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2915 break;
2916
2917 case DataType::Type::kFloat32: {
2918 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2919 XMM0);
2920 // To simplify callers of an OSR method, we put the return value in both
2921 // floating point and core register.
2922 if (GetGraph()->IsCompilingOsr()) {
2923 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2924 }
2925 break;
2926 }
2927 case DataType::Type::kFloat64: {
2928 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2929 XMM0);
2930 // To simplify callers of an OSR method, we put the return value in both
2931 // floating point and core register.
2932 if (GetGraph()->IsCompilingOsr()) {
2933 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2934 }
2935 break;
2936 }
2937
2938 default:
2939 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2940 }
2941 codegen_->GenerateFrameExit();
2942 }
2943
GetReturnLocation(DataType::Type type) const2944 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2945 switch (type) {
2946 case DataType::Type::kReference:
2947 case DataType::Type::kBool:
2948 case DataType::Type::kUint8:
2949 case DataType::Type::kInt8:
2950 case DataType::Type::kUint16:
2951 case DataType::Type::kInt16:
2952 case DataType::Type::kUint32:
2953 case DataType::Type::kInt32:
2954 case DataType::Type::kUint64:
2955 case DataType::Type::kInt64:
2956 return Location::RegisterLocation(RAX);
2957
2958 case DataType::Type::kVoid:
2959 return Location::NoLocation();
2960
2961 case DataType::Type::kFloat64:
2962 case DataType::Type::kFloat32:
2963 return Location::FpuRegisterLocation(XMM0);
2964 }
2965 }
2966
GetMethodLocation() const2967 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2968 return Location::RegisterLocation(kMethodRegisterArgument);
2969 }
2970
GetNextLocation(DataType::Type type)2971 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2972 switch (type) {
2973 case DataType::Type::kReference:
2974 case DataType::Type::kBool:
2975 case DataType::Type::kUint8:
2976 case DataType::Type::kInt8:
2977 case DataType::Type::kUint16:
2978 case DataType::Type::kInt16:
2979 case DataType::Type::kInt32: {
2980 uint32_t index = gp_index_++;
2981 stack_index_++;
2982 if (index < calling_convention.GetNumberOfRegisters()) {
2983 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2984 } else {
2985 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2986 }
2987 }
2988
2989 case DataType::Type::kInt64: {
2990 uint32_t index = gp_index_;
2991 stack_index_ += 2;
2992 if (index < calling_convention.GetNumberOfRegisters()) {
2993 gp_index_ += 1;
2994 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2995 } else {
2996 gp_index_ += 2;
2997 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2998 }
2999 }
3000
3001 case DataType::Type::kFloat32: {
3002 uint32_t index = float_index_++;
3003 stack_index_++;
3004 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3005 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3006 } else {
3007 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3008 }
3009 }
3010
3011 case DataType::Type::kFloat64: {
3012 uint32_t index = float_index_++;
3013 stack_index_ += 2;
3014 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3015 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3016 } else {
3017 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3018 }
3019 }
3020
3021 case DataType::Type::kUint32:
3022 case DataType::Type::kUint64:
3023 case DataType::Type::kVoid:
3024 LOG(FATAL) << "Unexpected parameter type " << type;
3025 UNREACHABLE();
3026 }
3027 return Location::NoLocation();
3028 }
3029
GetNextLocation(DataType::Type type)3030 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3031 DCHECK_NE(type, DataType::Type::kReference);
3032
3033 Location location = Location::NoLocation();
3034 if (DataType::IsFloatingPointType(type)) {
3035 if (fpr_index_ < kParameterFloatRegistersLength) {
3036 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3037 ++fpr_index_;
3038 }
3039 } else {
3040 // Native ABI uses the same registers as managed, except that the method register RDI
3041 // is a normal argument.
3042 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3043 location = Location::RegisterLocation(
3044 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3045 ++gpr_index_;
3046 }
3047 }
3048 if (location.IsInvalid()) {
3049 if (DataType::Is64BitType(type)) {
3050 location = Location::DoubleStackSlot(stack_offset_);
3051 } else {
3052 location = Location::StackSlot(stack_offset_);
3053 }
3054 stack_offset_ += kFramePointerSize;
3055
3056 if (for_register_allocation_) {
3057 location = Location::Any();
3058 }
3059 }
3060 return location;
3061 }
3062
GetReturnLocation(DataType::Type type) const3063 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3064 const {
3065 // We perform conversion to the managed ABI return register after the call if needed.
3066 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3067 return dex_calling_convention.GetReturnLocation(type);
3068 }
3069
GetMethodLocation() const3070 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3071 // Pass the method in the hidden argument RAX.
3072 return Location::RegisterLocation(RAX);
3073 }
3074
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3075 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3076 // The trampoline uses the same calling convention as dex calling conventions,
3077 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3078 // the method_idx.
3079 HandleInvoke(invoke);
3080 }
3081
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3082 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3083 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3084 }
3085
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3086 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3087 // Explicit clinit checks triggered by static invokes must have been pruned by
3088 // art::PrepareForRegisterAllocation.
3089 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3090
3091 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3092 if (intrinsic.TryDispatch(invoke)) {
3093 return;
3094 }
3095
3096 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3097 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3098 /*for_register_allocation=*/ true);
3099 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3100 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3101 } else {
3102 HandleInvoke(invoke);
3103 }
3104 }
3105
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3106 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3107 if (invoke->GetLocations()->Intrinsified()) {
3108 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3109 intrinsic.Dispatch(invoke);
3110 return true;
3111 }
3112 return false;
3113 }
3114
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3115 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3116 // Explicit clinit checks triggered by static invokes must have been pruned by
3117 // art::PrepareForRegisterAllocation.
3118 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3119
3120 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3121 return;
3122 }
3123
3124 LocationSummary* locations = invoke->GetLocations();
3125 codegen_->GenerateStaticOrDirectCall(
3126 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3127 }
3128
HandleInvoke(HInvoke * invoke)3129 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3130 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3131 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3132 }
3133
VisitInvokeVirtual(HInvokeVirtual * invoke)3134 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3135 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3136 if (intrinsic.TryDispatch(invoke)) {
3137 return;
3138 }
3139
3140 HandleInvoke(invoke);
3141 }
3142
VisitInvokeVirtual(HInvokeVirtual * invoke)3143 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3144 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3145 return;
3146 }
3147
3148 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3149 DCHECK(!codegen_->IsLeafMethod());
3150 }
3151
VisitInvokeInterface(HInvokeInterface * invoke)3152 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3153 HandleInvoke(invoke);
3154 // Add the hidden argument.
3155 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3156 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3157 Location::RegisterLocation(RAX));
3158 }
3159 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3160 }
3161
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3162 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3163 CpuRegister klass) {
3164 DCHECK_EQ(RDI, klass.AsRegister());
3165 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3166 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3167 DCHECK(info != nullptr);
3168 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3169 info, GetCompilerOptions(), instruction->AsInvoke());
3170 if (cache != nullptr) {
3171 uint64_t address = reinterpret_cast64<uint64_t>(cache);
3172 NearLabel done;
3173 __ movq(CpuRegister(TMP), Immediate(address));
3174 // Fast path for a monomorphic cache.
3175 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3176 __ j(kEqual, &done);
3177 GenerateInvokeRuntime(
3178 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3179 __ Bind(&done);
3180 } else {
3181 // This is unexpected, but we don't guarantee stable compilation across
3182 // JIT runs so just warn about it.
3183 ScopedObjectAccess soa(Thread::Current());
3184 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3185 }
3186 }
3187 }
3188
VisitInvokeInterface(HInvokeInterface * invoke)3189 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3190 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3191 LocationSummary* locations = invoke->GetLocations();
3192 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3193 Location receiver = locations->InAt(0);
3194 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3195
3196 if (receiver.IsStackSlot()) {
3197 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3198 // /* HeapReference<Class> */ temp = temp->klass_
3199 __ movl(temp, Address(temp, class_offset));
3200 } else {
3201 // /* HeapReference<Class> */ temp = receiver->klass_
3202 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3203 }
3204 codegen_->MaybeRecordImplicitNullCheck(invoke);
3205 // Instead of simply (possibly) unpoisoning `temp` here, we should
3206 // emit a read barrier for the previous class reference load.
3207 // However this is not required in practice, as this is an
3208 // intermediate/temporary reference and because the current
3209 // concurrent copying collector keeps the from-space memory
3210 // intact/accessible until the end of the marking phase (the
3211 // concurrent copying collector may not in the future).
3212 __ MaybeUnpoisonHeapReference(temp);
3213
3214 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3215
3216 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3217 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3218 Location hidden_reg = locations->GetTemp(1);
3219 // Set the hidden argument. This is safe to do this here, as RAX
3220 // won't be modified thereafter, before the `call` instruction.
3221 // We also do it after MaybeGenerateInlineCache that may use RAX.
3222 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3223 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3224 }
3225
3226 // temp = temp->GetAddressOfIMT()
3227 __ movq(temp,
3228 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3229 // temp = temp->GetImtEntryAt(method_offset);
3230 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3231 invoke->GetImtIndex(), kX86_64PointerSize));
3232 // temp = temp->GetImtEntryAt(method_offset);
3233 __ movq(temp, Address(temp, method_offset));
3234 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3235 // We pass the method from the IMT in case of a conflict. This will ensure
3236 // we go into the runtime to resolve the actual method.
3237 Location hidden_reg = locations->GetTemp(1);
3238 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3239 }
3240 // call temp->GetEntryPoint();
3241 __ call(Address(
3242 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3243
3244 DCHECK(!codegen_->IsLeafMethod());
3245 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3246 }
3247
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3248 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3249 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3250 if (intrinsic.TryDispatch(invoke)) {
3251 return;
3252 }
3253 HandleInvoke(invoke);
3254 }
3255
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3256 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3257 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3258 return;
3259 }
3260 codegen_->GenerateInvokePolymorphicCall(invoke);
3261 }
3262
VisitInvokeCustom(HInvokeCustom * invoke)3263 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3264 HandleInvoke(invoke);
3265 }
3266
VisitInvokeCustom(HInvokeCustom * invoke)3267 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3268 codegen_->GenerateInvokeCustomCall(invoke);
3269 }
3270
VisitNeg(HNeg * neg)3271 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3272 LocationSummary* locations =
3273 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3274 switch (neg->GetResultType()) {
3275 case DataType::Type::kInt32:
3276 case DataType::Type::kInt64:
3277 locations->SetInAt(0, Location::RequiresRegister());
3278 locations->SetOut(Location::SameAsFirstInput());
3279 break;
3280
3281 case DataType::Type::kFloat32:
3282 case DataType::Type::kFloat64:
3283 locations->SetInAt(0, Location::RequiresFpuRegister());
3284 locations->SetOut(Location::SameAsFirstInput());
3285 locations->AddTemp(Location::RequiresFpuRegister());
3286 break;
3287
3288 default:
3289 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3290 }
3291 }
3292
VisitNeg(HNeg * neg)3293 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3294 LocationSummary* locations = neg->GetLocations();
3295 Location out = locations->Out();
3296 Location in = locations->InAt(0);
3297 switch (neg->GetResultType()) {
3298 case DataType::Type::kInt32:
3299 DCHECK(in.IsRegister());
3300 DCHECK(in.Equals(out));
3301 __ negl(out.AsRegister<CpuRegister>());
3302 break;
3303
3304 case DataType::Type::kInt64:
3305 DCHECK(in.IsRegister());
3306 DCHECK(in.Equals(out));
3307 __ negq(out.AsRegister<CpuRegister>());
3308 break;
3309
3310 case DataType::Type::kFloat32: {
3311 DCHECK(in.Equals(out));
3312 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3313 // Implement float negation with an exclusive or with value
3314 // 0x80000000 (mask for bit 31, representing the sign of a
3315 // single-precision floating-point number).
3316 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3317 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3318 break;
3319 }
3320
3321 case DataType::Type::kFloat64: {
3322 DCHECK(in.Equals(out));
3323 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3324 // Implement double negation with an exclusive or with value
3325 // 0x8000000000000000 (mask for bit 63, representing the sign of
3326 // a double-precision floating-point number).
3327 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3328 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3329 break;
3330 }
3331
3332 default:
3333 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3334 }
3335 }
3336
VisitTypeConversion(HTypeConversion * conversion)3337 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3338 LocationSummary* locations =
3339 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3340 DataType::Type result_type = conversion->GetResultType();
3341 DataType::Type input_type = conversion->GetInputType();
3342 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3343 << input_type << " -> " << result_type;
3344
3345 switch (result_type) {
3346 case DataType::Type::kUint8:
3347 case DataType::Type::kInt8:
3348 case DataType::Type::kUint16:
3349 case DataType::Type::kInt16:
3350 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3351 locations->SetInAt(0, Location::Any());
3352 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3353 break;
3354
3355 case DataType::Type::kInt32:
3356 switch (input_type) {
3357 case DataType::Type::kInt64:
3358 locations->SetInAt(0, Location::Any());
3359 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3360 break;
3361
3362 case DataType::Type::kFloat32:
3363 locations->SetInAt(0, Location::RequiresFpuRegister());
3364 locations->SetOut(Location::RequiresRegister());
3365 break;
3366
3367 case DataType::Type::kFloat64:
3368 locations->SetInAt(0, Location::RequiresFpuRegister());
3369 locations->SetOut(Location::RequiresRegister());
3370 break;
3371
3372 default:
3373 LOG(FATAL) << "Unexpected type conversion from " << input_type
3374 << " to " << result_type;
3375 }
3376 break;
3377
3378 case DataType::Type::kInt64:
3379 switch (input_type) {
3380 case DataType::Type::kBool:
3381 case DataType::Type::kUint8:
3382 case DataType::Type::kInt8:
3383 case DataType::Type::kUint16:
3384 case DataType::Type::kInt16:
3385 case DataType::Type::kInt32:
3386 // TODO: We would benefit from a (to-be-implemented)
3387 // Location::RegisterOrStackSlot requirement for this input.
3388 locations->SetInAt(0, Location::RequiresRegister());
3389 locations->SetOut(Location::RequiresRegister());
3390 break;
3391
3392 case DataType::Type::kFloat32:
3393 locations->SetInAt(0, Location::RequiresFpuRegister());
3394 locations->SetOut(Location::RequiresRegister());
3395 break;
3396
3397 case DataType::Type::kFloat64:
3398 locations->SetInAt(0, Location::RequiresFpuRegister());
3399 locations->SetOut(Location::RequiresRegister());
3400 break;
3401
3402 default:
3403 LOG(FATAL) << "Unexpected type conversion from " << input_type
3404 << " to " << result_type;
3405 }
3406 break;
3407
3408 case DataType::Type::kFloat32:
3409 switch (input_type) {
3410 case DataType::Type::kBool:
3411 case DataType::Type::kUint8:
3412 case DataType::Type::kInt8:
3413 case DataType::Type::kUint16:
3414 case DataType::Type::kInt16:
3415 case DataType::Type::kInt32:
3416 locations->SetInAt(0, Location::Any());
3417 locations->SetOut(Location::RequiresFpuRegister());
3418 break;
3419
3420 case DataType::Type::kInt64:
3421 locations->SetInAt(0, Location::Any());
3422 locations->SetOut(Location::RequiresFpuRegister());
3423 break;
3424
3425 case DataType::Type::kFloat64:
3426 locations->SetInAt(0, Location::Any());
3427 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3428 break;
3429
3430 default:
3431 LOG(FATAL) << "Unexpected type conversion from " << input_type
3432 << " to " << result_type;
3433 }
3434 break;
3435
3436 case DataType::Type::kFloat64:
3437 switch (input_type) {
3438 case DataType::Type::kBool:
3439 case DataType::Type::kUint8:
3440 case DataType::Type::kInt8:
3441 case DataType::Type::kUint16:
3442 case DataType::Type::kInt16:
3443 case DataType::Type::kInt32:
3444 locations->SetInAt(0, Location::Any());
3445 locations->SetOut(Location::RequiresFpuRegister());
3446 break;
3447
3448 case DataType::Type::kInt64:
3449 locations->SetInAt(0, Location::Any());
3450 locations->SetOut(Location::RequiresFpuRegister());
3451 break;
3452
3453 case DataType::Type::kFloat32:
3454 locations->SetInAt(0, Location::Any());
3455 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3456 break;
3457
3458 default:
3459 LOG(FATAL) << "Unexpected type conversion from " << input_type
3460 << " to " << result_type;
3461 }
3462 break;
3463
3464 default:
3465 LOG(FATAL) << "Unexpected type conversion from " << input_type
3466 << " to " << result_type;
3467 }
3468 }
3469
VisitTypeConversion(HTypeConversion * conversion)3470 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3471 LocationSummary* locations = conversion->GetLocations();
3472 Location out = locations->Out();
3473 Location in = locations->InAt(0);
3474 DataType::Type result_type = conversion->GetResultType();
3475 DataType::Type input_type = conversion->GetInputType();
3476 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3477 << input_type << " -> " << result_type;
3478 switch (result_type) {
3479 case DataType::Type::kUint8:
3480 switch (input_type) {
3481 case DataType::Type::kInt8:
3482 case DataType::Type::kUint16:
3483 case DataType::Type::kInt16:
3484 case DataType::Type::kInt32:
3485 case DataType::Type::kInt64:
3486 if (in.IsRegister()) {
3487 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3488 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3489 __ movzxb(out.AsRegister<CpuRegister>(),
3490 Address(CpuRegister(RSP), in.GetStackIndex()));
3491 } else {
3492 __ movl(out.AsRegister<CpuRegister>(),
3493 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3494 }
3495 break;
3496
3497 default:
3498 LOG(FATAL) << "Unexpected type conversion from " << input_type
3499 << " to " << result_type;
3500 }
3501 break;
3502
3503 case DataType::Type::kInt8:
3504 switch (input_type) {
3505 case DataType::Type::kUint8:
3506 case DataType::Type::kUint16:
3507 case DataType::Type::kInt16:
3508 case DataType::Type::kInt32:
3509 case DataType::Type::kInt64:
3510 if (in.IsRegister()) {
3511 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3512 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3513 __ movsxb(out.AsRegister<CpuRegister>(),
3514 Address(CpuRegister(RSP), in.GetStackIndex()));
3515 } else {
3516 __ movl(out.AsRegister<CpuRegister>(),
3517 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3518 }
3519 break;
3520
3521 default:
3522 LOG(FATAL) << "Unexpected type conversion from " << input_type
3523 << " to " << result_type;
3524 }
3525 break;
3526
3527 case DataType::Type::kUint16:
3528 switch (input_type) {
3529 case DataType::Type::kInt8:
3530 case DataType::Type::kInt16:
3531 case DataType::Type::kInt32:
3532 case DataType::Type::kInt64:
3533 if (in.IsRegister()) {
3534 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3535 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3536 __ movzxw(out.AsRegister<CpuRegister>(),
3537 Address(CpuRegister(RSP), in.GetStackIndex()));
3538 } else {
3539 __ movl(out.AsRegister<CpuRegister>(),
3540 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3541 }
3542 break;
3543
3544 default:
3545 LOG(FATAL) << "Unexpected type conversion from " << input_type
3546 << " to " << result_type;
3547 }
3548 break;
3549
3550 case DataType::Type::kInt16:
3551 switch (input_type) {
3552 case DataType::Type::kUint16:
3553 case DataType::Type::kInt32:
3554 case DataType::Type::kInt64:
3555 if (in.IsRegister()) {
3556 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3557 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3558 __ movsxw(out.AsRegister<CpuRegister>(),
3559 Address(CpuRegister(RSP), in.GetStackIndex()));
3560 } else {
3561 __ movl(out.AsRegister<CpuRegister>(),
3562 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3563 }
3564 break;
3565
3566 default:
3567 LOG(FATAL) << "Unexpected type conversion from " << input_type
3568 << " to " << result_type;
3569 }
3570 break;
3571
3572 case DataType::Type::kInt32:
3573 switch (input_type) {
3574 case DataType::Type::kInt64:
3575 if (in.IsRegister()) {
3576 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3577 } else if (in.IsDoubleStackSlot()) {
3578 __ movl(out.AsRegister<CpuRegister>(),
3579 Address(CpuRegister(RSP), in.GetStackIndex()));
3580 } else {
3581 DCHECK(in.IsConstant());
3582 DCHECK(in.GetConstant()->IsLongConstant());
3583 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3584 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3585 }
3586 break;
3587
3588 case DataType::Type::kFloat32: {
3589 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3590 CpuRegister output = out.AsRegister<CpuRegister>();
3591 NearLabel done, nan;
3592
3593 __ movl(output, Immediate(kPrimIntMax));
3594 // if input >= (float)INT_MAX goto done
3595 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3596 __ j(kAboveEqual, &done);
3597 // if input == NaN goto nan
3598 __ j(kUnordered, &nan);
3599 // output = float-to-int-truncate(input)
3600 __ cvttss2si(output, input, false);
3601 __ jmp(&done);
3602 __ Bind(&nan);
3603 // output = 0
3604 __ xorl(output, output);
3605 __ Bind(&done);
3606 break;
3607 }
3608
3609 case DataType::Type::kFloat64: {
3610 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3611 CpuRegister output = out.AsRegister<CpuRegister>();
3612 NearLabel done, nan;
3613
3614 __ movl(output, Immediate(kPrimIntMax));
3615 // if input >= (double)INT_MAX goto done
3616 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3617 __ j(kAboveEqual, &done);
3618 // if input == NaN goto nan
3619 __ j(kUnordered, &nan);
3620 // output = double-to-int-truncate(input)
3621 __ cvttsd2si(output, input);
3622 __ jmp(&done);
3623 __ Bind(&nan);
3624 // output = 0
3625 __ xorl(output, output);
3626 __ Bind(&done);
3627 break;
3628 }
3629
3630 default:
3631 LOG(FATAL) << "Unexpected type conversion from " << input_type
3632 << " to " << result_type;
3633 }
3634 break;
3635
3636 case DataType::Type::kInt64:
3637 switch (input_type) {
3638 DCHECK(out.IsRegister());
3639 case DataType::Type::kBool:
3640 case DataType::Type::kUint8:
3641 case DataType::Type::kInt8:
3642 case DataType::Type::kUint16:
3643 case DataType::Type::kInt16:
3644 case DataType::Type::kInt32:
3645 DCHECK(in.IsRegister());
3646 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3647 break;
3648
3649 case DataType::Type::kFloat32: {
3650 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3651 CpuRegister output = out.AsRegister<CpuRegister>();
3652 NearLabel done, nan;
3653
3654 codegen_->Load64BitValue(output, kPrimLongMax);
3655 // if input >= (float)LONG_MAX goto done
3656 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3657 __ j(kAboveEqual, &done);
3658 // if input == NaN goto nan
3659 __ j(kUnordered, &nan);
3660 // output = float-to-long-truncate(input)
3661 __ cvttss2si(output, input, true);
3662 __ jmp(&done);
3663 __ Bind(&nan);
3664 // output = 0
3665 __ xorl(output, output);
3666 __ Bind(&done);
3667 break;
3668 }
3669
3670 case DataType::Type::kFloat64: {
3671 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3672 CpuRegister output = out.AsRegister<CpuRegister>();
3673 NearLabel done, nan;
3674
3675 codegen_->Load64BitValue(output, kPrimLongMax);
3676 // if input >= (double)LONG_MAX goto done
3677 __ comisd(input, codegen_->LiteralDoubleAddress(
3678 static_cast<double>(kPrimLongMax)));
3679 __ j(kAboveEqual, &done);
3680 // if input == NaN goto nan
3681 __ j(kUnordered, &nan);
3682 // output = double-to-long-truncate(input)
3683 __ cvttsd2si(output, input, true);
3684 __ jmp(&done);
3685 __ Bind(&nan);
3686 // output = 0
3687 __ xorl(output, output);
3688 __ Bind(&done);
3689 break;
3690 }
3691
3692 default:
3693 LOG(FATAL) << "Unexpected type conversion from " << input_type
3694 << " to " << result_type;
3695 }
3696 break;
3697
3698 case DataType::Type::kFloat32:
3699 switch (input_type) {
3700 case DataType::Type::kBool:
3701 case DataType::Type::kUint8:
3702 case DataType::Type::kInt8:
3703 case DataType::Type::kUint16:
3704 case DataType::Type::kInt16:
3705 case DataType::Type::kInt32:
3706 if (in.IsRegister()) {
3707 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3708 } else if (in.IsConstant()) {
3709 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3710 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3711 codegen_->Load32BitValue(dest, static_cast<float>(v));
3712 } else {
3713 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3714 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3715 }
3716 break;
3717
3718 case DataType::Type::kInt64:
3719 if (in.IsRegister()) {
3720 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3721 } else if (in.IsConstant()) {
3722 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3723 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3724 codegen_->Load32BitValue(dest, static_cast<float>(v));
3725 } else {
3726 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3727 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3728 }
3729 break;
3730
3731 case DataType::Type::kFloat64:
3732 if (in.IsFpuRegister()) {
3733 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3734 } else if (in.IsConstant()) {
3735 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3736 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3737 codegen_->Load32BitValue(dest, static_cast<float>(v));
3738 } else {
3739 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3740 Address(CpuRegister(RSP), in.GetStackIndex()));
3741 }
3742 break;
3743
3744 default:
3745 LOG(FATAL) << "Unexpected type conversion from " << input_type
3746 << " to " << result_type;
3747 }
3748 break;
3749
3750 case DataType::Type::kFloat64:
3751 switch (input_type) {
3752 case DataType::Type::kBool:
3753 case DataType::Type::kUint8:
3754 case DataType::Type::kInt8:
3755 case DataType::Type::kUint16:
3756 case DataType::Type::kInt16:
3757 case DataType::Type::kInt32:
3758 if (in.IsRegister()) {
3759 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3760 } else if (in.IsConstant()) {
3761 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3762 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3763 codegen_->Load64BitValue(dest, static_cast<double>(v));
3764 } else {
3765 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3766 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3767 }
3768 break;
3769
3770 case DataType::Type::kInt64:
3771 if (in.IsRegister()) {
3772 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3773 } else if (in.IsConstant()) {
3774 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3775 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3776 codegen_->Load64BitValue(dest, static_cast<double>(v));
3777 } else {
3778 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3779 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3780 }
3781 break;
3782
3783 case DataType::Type::kFloat32:
3784 if (in.IsFpuRegister()) {
3785 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3786 } else if (in.IsConstant()) {
3787 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3788 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3789 codegen_->Load64BitValue(dest, static_cast<double>(v));
3790 } else {
3791 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3792 Address(CpuRegister(RSP), in.GetStackIndex()));
3793 }
3794 break;
3795
3796 default:
3797 LOG(FATAL) << "Unexpected type conversion from " << input_type
3798 << " to " << result_type;
3799 }
3800 break;
3801
3802 default:
3803 LOG(FATAL) << "Unexpected type conversion from " << input_type
3804 << " to " << result_type;
3805 }
3806 }
3807
VisitAdd(HAdd * add)3808 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3809 LocationSummary* locations =
3810 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3811 switch (add->GetResultType()) {
3812 case DataType::Type::kInt32: {
3813 locations->SetInAt(0, Location::RequiresRegister());
3814 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3815 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3816 break;
3817 }
3818
3819 case DataType::Type::kInt64: {
3820 locations->SetInAt(0, Location::RequiresRegister());
3821 // We can use a leaq or addq if the constant can fit in an immediate.
3822 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3823 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3824 break;
3825 }
3826
3827 case DataType::Type::kFloat64:
3828 case DataType::Type::kFloat32: {
3829 locations->SetInAt(0, Location::RequiresFpuRegister());
3830 locations->SetInAt(1, Location::Any());
3831 locations->SetOut(Location::SameAsFirstInput());
3832 break;
3833 }
3834
3835 default:
3836 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3837 }
3838 }
3839
VisitAdd(HAdd * add)3840 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3841 LocationSummary* locations = add->GetLocations();
3842 Location first = locations->InAt(0);
3843 Location second = locations->InAt(1);
3844 Location out = locations->Out();
3845
3846 switch (add->GetResultType()) {
3847 case DataType::Type::kInt32: {
3848 if (second.IsRegister()) {
3849 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3850 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3851 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3852 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3853 } else {
3854 __ leal(out.AsRegister<CpuRegister>(), Address(
3855 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3856 }
3857 } else if (second.IsConstant()) {
3858 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3859 __ addl(out.AsRegister<CpuRegister>(),
3860 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3861 } else {
3862 __ leal(out.AsRegister<CpuRegister>(), Address(
3863 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3864 }
3865 } else {
3866 DCHECK(first.Equals(locations->Out()));
3867 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3868 }
3869 break;
3870 }
3871
3872 case DataType::Type::kInt64: {
3873 if (second.IsRegister()) {
3874 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3875 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3876 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3877 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3878 } else {
3879 __ leaq(out.AsRegister<CpuRegister>(), Address(
3880 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3881 }
3882 } else {
3883 DCHECK(second.IsConstant());
3884 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3885 int32_t int32_value = Low32Bits(value);
3886 DCHECK_EQ(int32_value, value);
3887 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3888 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3889 } else {
3890 __ leaq(out.AsRegister<CpuRegister>(), Address(
3891 first.AsRegister<CpuRegister>(), int32_value));
3892 }
3893 }
3894 break;
3895 }
3896
3897 case DataType::Type::kFloat32: {
3898 if (second.IsFpuRegister()) {
3899 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3900 } else if (second.IsConstant()) {
3901 __ addss(first.AsFpuRegister<XmmRegister>(),
3902 codegen_->LiteralFloatAddress(
3903 second.GetConstant()->AsFloatConstant()->GetValue()));
3904 } else {
3905 DCHECK(second.IsStackSlot());
3906 __ addss(first.AsFpuRegister<XmmRegister>(),
3907 Address(CpuRegister(RSP), second.GetStackIndex()));
3908 }
3909 break;
3910 }
3911
3912 case DataType::Type::kFloat64: {
3913 if (second.IsFpuRegister()) {
3914 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3915 } else if (second.IsConstant()) {
3916 __ addsd(first.AsFpuRegister<XmmRegister>(),
3917 codegen_->LiteralDoubleAddress(
3918 second.GetConstant()->AsDoubleConstant()->GetValue()));
3919 } else {
3920 DCHECK(second.IsDoubleStackSlot());
3921 __ addsd(first.AsFpuRegister<XmmRegister>(),
3922 Address(CpuRegister(RSP), second.GetStackIndex()));
3923 }
3924 break;
3925 }
3926
3927 default:
3928 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3929 }
3930 }
3931
VisitSub(HSub * sub)3932 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3933 LocationSummary* locations =
3934 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3935 switch (sub->GetResultType()) {
3936 case DataType::Type::kInt32: {
3937 locations->SetInAt(0, Location::RequiresRegister());
3938 locations->SetInAt(1, Location::Any());
3939 locations->SetOut(Location::SameAsFirstInput());
3940 break;
3941 }
3942 case DataType::Type::kInt64: {
3943 locations->SetInAt(0, Location::RequiresRegister());
3944 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3945 locations->SetOut(Location::SameAsFirstInput());
3946 break;
3947 }
3948 case DataType::Type::kFloat32:
3949 case DataType::Type::kFloat64: {
3950 locations->SetInAt(0, Location::RequiresFpuRegister());
3951 locations->SetInAt(1, Location::Any());
3952 locations->SetOut(Location::SameAsFirstInput());
3953 break;
3954 }
3955 default:
3956 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3957 }
3958 }
3959
VisitSub(HSub * sub)3960 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3961 LocationSummary* locations = sub->GetLocations();
3962 Location first = locations->InAt(0);
3963 Location second = locations->InAt(1);
3964 DCHECK(first.Equals(locations->Out()));
3965 switch (sub->GetResultType()) {
3966 case DataType::Type::kInt32: {
3967 if (second.IsRegister()) {
3968 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3969 } else if (second.IsConstant()) {
3970 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3971 __ subl(first.AsRegister<CpuRegister>(), imm);
3972 } else {
3973 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3974 }
3975 break;
3976 }
3977 case DataType::Type::kInt64: {
3978 if (second.IsConstant()) {
3979 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3980 DCHECK(IsInt<32>(value));
3981 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3982 } else {
3983 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3984 }
3985 break;
3986 }
3987
3988 case DataType::Type::kFloat32: {
3989 if (second.IsFpuRegister()) {
3990 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3991 } else if (second.IsConstant()) {
3992 __ subss(first.AsFpuRegister<XmmRegister>(),
3993 codegen_->LiteralFloatAddress(
3994 second.GetConstant()->AsFloatConstant()->GetValue()));
3995 } else {
3996 DCHECK(second.IsStackSlot());
3997 __ subss(first.AsFpuRegister<XmmRegister>(),
3998 Address(CpuRegister(RSP), second.GetStackIndex()));
3999 }
4000 break;
4001 }
4002
4003 case DataType::Type::kFloat64: {
4004 if (second.IsFpuRegister()) {
4005 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4006 } else if (second.IsConstant()) {
4007 __ subsd(first.AsFpuRegister<XmmRegister>(),
4008 codegen_->LiteralDoubleAddress(
4009 second.GetConstant()->AsDoubleConstant()->GetValue()));
4010 } else {
4011 DCHECK(second.IsDoubleStackSlot());
4012 __ subsd(first.AsFpuRegister<XmmRegister>(),
4013 Address(CpuRegister(RSP), second.GetStackIndex()));
4014 }
4015 break;
4016 }
4017
4018 default:
4019 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4020 }
4021 }
4022
VisitMul(HMul * mul)4023 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4024 LocationSummary* locations =
4025 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4026 switch (mul->GetResultType()) {
4027 case DataType::Type::kInt32: {
4028 locations->SetInAt(0, Location::RequiresRegister());
4029 locations->SetInAt(1, Location::Any());
4030 if (mul->InputAt(1)->IsIntConstant()) {
4031 // Can use 3 operand multiply.
4032 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4033 } else {
4034 locations->SetOut(Location::SameAsFirstInput());
4035 }
4036 break;
4037 }
4038 case DataType::Type::kInt64: {
4039 locations->SetInAt(0, Location::RequiresRegister());
4040 locations->SetInAt(1, Location::Any());
4041 if (mul->InputAt(1)->IsLongConstant() &&
4042 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4043 // Can use 3 operand multiply.
4044 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4045 } else {
4046 locations->SetOut(Location::SameAsFirstInput());
4047 }
4048 break;
4049 }
4050 case DataType::Type::kFloat32:
4051 case DataType::Type::kFloat64: {
4052 locations->SetInAt(0, Location::RequiresFpuRegister());
4053 locations->SetInAt(1, Location::Any());
4054 locations->SetOut(Location::SameAsFirstInput());
4055 break;
4056 }
4057
4058 default:
4059 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4060 }
4061 }
4062
VisitMul(HMul * mul)4063 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4064 LocationSummary* locations = mul->GetLocations();
4065 Location first = locations->InAt(0);
4066 Location second = locations->InAt(1);
4067 Location out = locations->Out();
4068 switch (mul->GetResultType()) {
4069 case DataType::Type::kInt32:
4070 // The constant may have ended up in a register, so test explicitly to avoid
4071 // problems where the output may not be the same as the first operand.
4072 if (mul->InputAt(1)->IsIntConstant()) {
4073 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4074 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4075 } else if (second.IsRegister()) {
4076 DCHECK(first.Equals(out));
4077 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4078 } else {
4079 DCHECK(first.Equals(out));
4080 DCHECK(second.IsStackSlot());
4081 __ imull(first.AsRegister<CpuRegister>(),
4082 Address(CpuRegister(RSP), second.GetStackIndex()));
4083 }
4084 break;
4085 case DataType::Type::kInt64: {
4086 // The constant may have ended up in a register, so test explicitly to avoid
4087 // problems where the output may not be the same as the first operand.
4088 if (mul->InputAt(1)->IsLongConstant()) {
4089 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4090 if (IsInt<32>(value)) {
4091 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4092 Immediate(static_cast<int32_t>(value)));
4093 } else {
4094 // Have to use the constant area.
4095 DCHECK(first.Equals(out));
4096 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4097 }
4098 } else if (second.IsRegister()) {
4099 DCHECK(first.Equals(out));
4100 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4101 } else {
4102 DCHECK(second.IsDoubleStackSlot());
4103 DCHECK(first.Equals(out));
4104 __ imulq(first.AsRegister<CpuRegister>(),
4105 Address(CpuRegister(RSP), second.GetStackIndex()));
4106 }
4107 break;
4108 }
4109
4110 case DataType::Type::kFloat32: {
4111 DCHECK(first.Equals(out));
4112 if (second.IsFpuRegister()) {
4113 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4114 } else if (second.IsConstant()) {
4115 __ mulss(first.AsFpuRegister<XmmRegister>(),
4116 codegen_->LiteralFloatAddress(
4117 second.GetConstant()->AsFloatConstant()->GetValue()));
4118 } else {
4119 DCHECK(second.IsStackSlot());
4120 __ mulss(first.AsFpuRegister<XmmRegister>(),
4121 Address(CpuRegister(RSP), second.GetStackIndex()));
4122 }
4123 break;
4124 }
4125
4126 case DataType::Type::kFloat64: {
4127 DCHECK(first.Equals(out));
4128 if (second.IsFpuRegister()) {
4129 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4130 } else if (second.IsConstant()) {
4131 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4132 codegen_->LiteralDoubleAddress(
4133 second.GetConstant()->AsDoubleConstant()->GetValue()));
4134 } else {
4135 DCHECK(second.IsDoubleStackSlot());
4136 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4137 Address(CpuRegister(RSP), second.GetStackIndex()));
4138 }
4139 break;
4140 }
4141
4142 default:
4143 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4144 }
4145 }
4146
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4147 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4148 uint32_t stack_adjustment, bool is_float) {
4149 if (source.IsStackSlot()) {
4150 DCHECK(is_float);
4151 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4152 } else if (source.IsDoubleStackSlot()) {
4153 DCHECK(!is_float);
4154 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4155 } else {
4156 // Write the value to the temporary location on the stack and load to FP stack.
4157 if (is_float) {
4158 Location stack_temp = Location::StackSlot(temp_offset);
4159 codegen_->Move(stack_temp, source);
4160 __ flds(Address(CpuRegister(RSP), temp_offset));
4161 } else {
4162 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4163 codegen_->Move(stack_temp, source);
4164 __ fldl(Address(CpuRegister(RSP), temp_offset));
4165 }
4166 }
4167 }
4168
GenerateRemFP(HRem * rem)4169 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4170 DataType::Type type = rem->GetResultType();
4171 bool is_float = type == DataType::Type::kFloat32;
4172 size_t elem_size = DataType::Size(type);
4173 LocationSummary* locations = rem->GetLocations();
4174 Location first = locations->InAt(0);
4175 Location second = locations->InAt(1);
4176 Location out = locations->Out();
4177
4178 // Create stack space for 2 elements.
4179 // TODO: enhance register allocator to ask for stack temporaries.
4180 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4181
4182 // Load the values to the FP stack in reverse order, using temporaries if needed.
4183 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4184 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4185
4186 // Loop doing FPREM until we stabilize.
4187 NearLabel retry;
4188 __ Bind(&retry);
4189 __ fprem();
4190
4191 // Move FP status to AX.
4192 __ fstsw();
4193
4194 // And see if the argument reduction is complete. This is signaled by the
4195 // C2 FPU flag bit set to 0.
4196 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4197 __ j(kNotEqual, &retry);
4198
4199 // We have settled on the final value. Retrieve it into an XMM register.
4200 // Store FP top of stack to real stack.
4201 if (is_float) {
4202 __ fsts(Address(CpuRegister(RSP), 0));
4203 } else {
4204 __ fstl(Address(CpuRegister(RSP), 0));
4205 }
4206
4207 // Pop the 2 items from the FP stack.
4208 __ fucompp();
4209
4210 // Load the value from the stack into an XMM register.
4211 DCHECK(out.IsFpuRegister()) << out;
4212 if (is_float) {
4213 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4214 } else {
4215 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4216 }
4217
4218 // And remove the temporary stack space we allocated.
4219 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4220 }
4221
DivRemOneOrMinusOne(HBinaryOperation * instruction)4222 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4223 DCHECK(instruction->IsDiv() || instruction->IsRem());
4224
4225 LocationSummary* locations = instruction->GetLocations();
4226 Location second = locations->InAt(1);
4227 DCHECK(second.IsConstant());
4228
4229 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4230 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4231 int64_t imm = Int64FromConstant(second.GetConstant());
4232
4233 DCHECK(imm == 1 || imm == -1);
4234
4235 switch (instruction->GetResultType()) {
4236 case DataType::Type::kInt32: {
4237 if (instruction->IsRem()) {
4238 __ xorl(output_register, output_register);
4239 } else {
4240 __ movl(output_register, input_register);
4241 if (imm == -1) {
4242 __ negl(output_register);
4243 }
4244 }
4245 break;
4246 }
4247
4248 case DataType::Type::kInt64: {
4249 if (instruction->IsRem()) {
4250 __ xorl(output_register, output_register);
4251 } else {
4252 __ movq(output_register, input_register);
4253 if (imm == -1) {
4254 __ negq(output_register);
4255 }
4256 }
4257 break;
4258 }
4259
4260 default:
4261 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4262 }
4263 }
RemByPowerOfTwo(HRem * instruction)4264 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4265 LocationSummary* locations = instruction->GetLocations();
4266 Location second = locations->InAt(1);
4267 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4268 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4269 int64_t imm = Int64FromConstant(second.GetConstant());
4270 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4271 uint64_t abs_imm = AbsOrMin(imm);
4272 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4273 if (instruction->GetResultType() == DataType::Type::kInt32) {
4274 NearLabel done;
4275 __ movl(out, numerator);
4276 __ andl(out, Immediate(abs_imm-1));
4277 __ j(Condition::kZero, &done);
4278 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4279 __ testl(numerator, numerator);
4280 __ cmov(Condition::kLess, out, tmp, false);
4281 __ Bind(&done);
4282
4283 } else {
4284 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4285 codegen_->Load64BitValue(tmp, abs_imm - 1);
4286 NearLabel done;
4287
4288 __ movq(out, numerator);
4289 __ andq(out, tmp);
4290 __ j(Condition::kZero, &done);
4291 __ movq(tmp, numerator);
4292 __ sarq(tmp, Immediate(63));
4293 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4294 __ orq(out, tmp);
4295 __ Bind(&done);
4296 }
4297 }
DivByPowerOfTwo(HDiv * instruction)4298 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4299 LocationSummary* locations = instruction->GetLocations();
4300 Location second = locations->InAt(1);
4301
4302 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4303 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4304
4305 int64_t imm = Int64FromConstant(second.GetConstant());
4306 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4307 uint64_t abs_imm = AbsOrMin(imm);
4308
4309 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4310
4311 if (instruction->GetResultType() == DataType::Type::kInt32) {
4312 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4313 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4314 if (abs_imm == 2) {
4315 __ leal(tmp, Address(numerator, 0));
4316 __ shrl(tmp, Immediate(31));
4317 __ addl(tmp, numerator);
4318 } else {
4319 __ leal(tmp, Address(numerator, abs_imm - 1));
4320 __ testl(numerator, numerator);
4321 __ cmov(kGreaterEqual, tmp, numerator);
4322 }
4323 int shift = CTZ(imm);
4324 __ sarl(tmp, Immediate(shift));
4325
4326 if (imm < 0) {
4327 __ negl(tmp);
4328 }
4329
4330 __ movl(output_register, tmp);
4331 } else {
4332 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4333 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4334 if (abs_imm == 2) {
4335 __ movq(rdx, numerator);
4336 __ shrq(rdx, Immediate(63));
4337 __ addq(rdx, numerator);
4338 } else {
4339 codegen_->Load64BitValue(rdx, abs_imm - 1);
4340 __ addq(rdx, numerator);
4341 __ testq(numerator, numerator);
4342 __ cmov(kGreaterEqual, rdx, numerator);
4343 }
4344 int shift = CTZ(imm);
4345 __ sarq(rdx, Immediate(shift));
4346
4347 if (imm < 0) {
4348 __ negq(rdx);
4349 }
4350
4351 __ movq(output_register, rdx);
4352 }
4353 }
4354
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4355 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4356 DCHECK(instruction->IsDiv() || instruction->IsRem());
4357
4358 LocationSummary* locations = instruction->GetLocations();
4359 Location second = locations->InAt(1);
4360
4361 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4362 : locations->GetTemp(0).AsRegister<CpuRegister>();
4363 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4364 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4365 : locations->Out().AsRegister<CpuRegister>();
4366 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4367
4368 DCHECK_EQ(RAX, eax.AsRegister());
4369 DCHECK_EQ(RDX, edx.AsRegister());
4370 if (instruction->IsDiv()) {
4371 DCHECK_EQ(RAX, out.AsRegister());
4372 } else {
4373 DCHECK_EQ(RDX, out.AsRegister());
4374 }
4375
4376 int64_t magic;
4377 int shift;
4378
4379 // TODO: can these branches be written as one?
4380 if (instruction->GetResultType() == DataType::Type::kInt32) {
4381 int imm = second.GetConstant()->AsIntConstant()->GetValue();
4382
4383 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4384
4385 __ movl(numerator, eax);
4386
4387 __ movl(eax, Immediate(magic));
4388 __ imull(numerator);
4389
4390 if (imm > 0 && magic < 0) {
4391 __ addl(edx, numerator);
4392 } else if (imm < 0 && magic > 0) {
4393 __ subl(edx, numerator);
4394 }
4395
4396 if (shift != 0) {
4397 __ sarl(edx, Immediate(shift));
4398 }
4399
4400 __ movl(eax, edx);
4401 __ shrl(edx, Immediate(31));
4402 __ addl(edx, eax);
4403
4404 if (instruction->IsRem()) {
4405 __ movl(eax, numerator);
4406 __ imull(edx, Immediate(imm));
4407 __ subl(eax, edx);
4408 __ movl(edx, eax);
4409 } else {
4410 __ movl(eax, edx);
4411 }
4412 } else {
4413 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4414
4415 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4416
4417 CpuRegister rax = eax;
4418 CpuRegister rdx = edx;
4419
4420 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4421
4422 // Save the numerator.
4423 __ movq(numerator, rax);
4424
4425 // RAX = magic
4426 codegen_->Load64BitValue(rax, magic);
4427
4428 // RDX:RAX = magic * numerator
4429 __ imulq(numerator);
4430
4431 if (imm > 0 && magic < 0) {
4432 // RDX += numerator
4433 __ addq(rdx, numerator);
4434 } else if (imm < 0 && magic > 0) {
4435 // RDX -= numerator
4436 __ subq(rdx, numerator);
4437 }
4438
4439 // Shift if needed.
4440 if (shift != 0) {
4441 __ sarq(rdx, Immediate(shift));
4442 }
4443
4444 // RDX += 1 if RDX < 0
4445 __ movq(rax, rdx);
4446 __ shrq(rdx, Immediate(63));
4447 __ addq(rdx, rax);
4448
4449 if (instruction->IsRem()) {
4450 __ movq(rax, numerator);
4451
4452 if (IsInt<32>(imm)) {
4453 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4454 } else {
4455 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4456 }
4457
4458 __ subq(rax, rdx);
4459 __ movq(rdx, rax);
4460 } else {
4461 __ movq(rax, rdx);
4462 }
4463 }
4464 }
4465
GenerateDivRemIntegral(HBinaryOperation * instruction)4466 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4467 DCHECK(instruction->IsDiv() || instruction->IsRem());
4468 DataType::Type type = instruction->GetResultType();
4469 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4470
4471 bool is_div = instruction->IsDiv();
4472 LocationSummary* locations = instruction->GetLocations();
4473
4474 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4475 Location second = locations->InAt(1);
4476
4477 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4478 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4479
4480 if (second.IsConstant()) {
4481 int64_t imm = Int64FromConstant(second.GetConstant());
4482
4483 if (imm == 0) {
4484 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4485 } else if (imm == 1 || imm == -1) {
4486 DivRemOneOrMinusOne(instruction);
4487 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4488 if (is_div) {
4489 DivByPowerOfTwo(instruction->AsDiv());
4490 } else {
4491 RemByPowerOfTwo(instruction->AsRem());
4492 }
4493 } else {
4494 DCHECK(imm <= -2 || imm >= 2);
4495 GenerateDivRemWithAnyConstant(instruction);
4496 }
4497 } else {
4498 SlowPathCode* slow_path =
4499 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4500 instruction, out.AsRegister(), type, is_div);
4501 codegen_->AddSlowPath(slow_path);
4502
4503 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4504 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4505 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4506 // so it's safe to just use negl instead of more complex comparisons.
4507 if (type == DataType::Type::kInt32) {
4508 __ cmpl(second_reg, Immediate(-1));
4509 __ j(kEqual, slow_path->GetEntryLabel());
4510 // edx:eax <- sign-extended of eax
4511 __ cdq();
4512 // eax = quotient, edx = remainder
4513 __ idivl(second_reg);
4514 } else {
4515 __ cmpq(second_reg, Immediate(-1));
4516 __ j(kEqual, slow_path->GetEntryLabel());
4517 // rdx:rax <- sign-extended of rax
4518 __ cqo();
4519 // rax = quotient, rdx = remainder
4520 __ idivq(second_reg);
4521 }
4522 __ Bind(slow_path->GetExitLabel());
4523 }
4524 }
4525
VisitDiv(HDiv * div)4526 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4527 LocationSummary* locations =
4528 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4529 switch (div->GetResultType()) {
4530 case DataType::Type::kInt32:
4531 case DataType::Type::kInt64: {
4532 locations->SetInAt(0, Location::RegisterLocation(RAX));
4533 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4534 locations->SetOut(Location::SameAsFirstInput());
4535 // Intel uses edx:eax as the dividend.
4536 locations->AddTemp(Location::RegisterLocation(RDX));
4537 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4538 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4539 // output and request another temp.
4540 if (div->InputAt(1)->IsConstant()) {
4541 locations->AddTemp(Location::RequiresRegister());
4542 }
4543 break;
4544 }
4545
4546 case DataType::Type::kFloat32:
4547 case DataType::Type::kFloat64: {
4548 locations->SetInAt(0, Location::RequiresFpuRegister());
4549 locations->SetInAt(1, Location::Any());
4550 locations->SetOut(Location::SameAsFirstInput());
4551 break;
4552 }
4553
4554 default:
4555 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4556 }
4557 }
4558
VisitDiv(HDiv * div)4559 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4560 LocationSummary* locations = div->GetLocations();
4561 Location first = locations->InAt(0);
4562 Location second = locations->InAt(1);
4563 DCHECK(first.Equals(locations->Out()));
4564
4565 DataType::Type type = div->GetResultType();
4566 switch (type) {
4567 case DataType::Type::kInt32:
4568 case DataType::Type::kInt64: {
4569 GenerateDivRemIntegral(div);
4570 break;
4571 }
4572
4573 case DataType::Type::kFloat32: {
4574 if (second.IsFpuRegister()) {
4575 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4576 } else if (second.IsConstant()) {
4577 __ divss(first.AsFpuRegister<XmmRegister>(),
4578 codegen_->LiteralFloatAddress(
4579 second.GetConstant()->AsFloatConstant()->GetValue()));
4580 } else {
4581 DCHECK(second.IsStackSlot());
4582 __ divss(first.AsFpuRegister<XmmRegister>(),
4583 Address(CpuRegister(RSP), second.GetStackIndex()));
4584 }
4585 break;
4586 }
4587
4588 case DataType::Type::kFloat64: {
4589 if (second.IsFpuRegister()) {
4590 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4591 } else if (second.IsConstant()) {
4592 __ divsd(first.AsFpuRegister<XmmRegister>(),
4593 codegen_->LiteralDoubleAddress(
4594 second.GetConstant()->AsDoubleConstant()->GetValue()));
4595 } else {
4596 DCHECK(second.IsDoubleStackSlot());
4597 __ divsd(first.AsFpuRegister<XmmRegister>(),
4598 Address(CpuRegister(RSP), second.GetStackIndex()));
4599 }
4600 break;
4601 }
4602
4603 default:
4604 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4605 }
4606 }
4607
VisitRem(HRem * rem)4608 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4609 DataType::Type type = rem->GetResultType();
4610 LocationSummary* locations =
4611 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4612
4613 switch (type) {
4614 case DataType::Type::kInt32:
4615 case DataType::Type::kInt64: {
4616 locations->SetInAt(0, Location::RegisterLocation(RAX));
4617 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4618 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4619 locations->SetOut(Location::RegisterLocation(RDX));
4620 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4621 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4622 // output and request another temp.
4623 if (rem->InputAt(1)->IsConstant()) {
4624 locations->AddTemp(Location::RequiresRegister());
4625 }
4626 break;
4627 }
4628
4629 case DataType::Type::kFloat32:
4630 case DataType::Type::kFloat64: {
4631 locations->SetInAt(0, Location::Any());
4632 locations->SetInAt(1, Location::Any());
4633 locations->SetOut(Location::RequiresFpuRegister());
4634 locations->AddTemp(Location::RegisterLocation(RAX));
4635 break;
4636 }
4637
4638 default:
4639 LOG(FATAL) << "Unexpected rem type " << type;
4640 }
4641 }
4642
VisitRem(HRem * rem)4643 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4644 DataType::Type type = rem->GetResultType();
4645 switch (type) {
4646 case DataType::Type::kInt32:
4647 case DataType::Type::kInt64: {
4648 GenerateDivRemIntegral(rem);
4649 break;
4650 }
4651 case DataType::Type::kFloat32:
4652 case DataType::Type::kFloat64: {
4653 GenerateRemFP(rem);
4654 break;
4655 }
4656 default:
4657 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4658 }
4659 }
4660
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4661 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4662 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4663 switch (minmax->GetResultType()) {
4664 case DataType::Type::kInt32:
4665 case DataType::Type::kInt64:
4666 locations->SetInAt(0, Location::RequiresRegister());
4667 locations->SetInAt(1, Location::RequiresRegister());
4668 locations->SetOut(Location::SameAsFirstInput());
4669 break;
4670 case DataType::Type::kFloat32:
4671 case DataType::Type::kFloat64:
4672 locations->SetInAt(0, Location::RequiresFpuRegister());
4673 locations->SetInAt(1, Location::RequiresFpuRegister());
4674 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4675 // the second input to be the output (we can simply swap inputs).
4676 locations->SetOut(Location::SameAsFirstInput());
4677 break;
4678 default:
4679 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4680 }
4681 }
4682
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4683 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4684 bool is_min,
4685 DataType::Type type) {
4686 Location op1_loc = locations->InAt(0);
4687 Location op2_loc = locations->InAt(1);
4688
4689 // Shortcut for same input locations.
4690 if (op1_loc.Equals(op2_loc)) {
4691 // Can return immediately, as op1_loc == out_loc.
4692 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4693 // a copy here.
4694 DCHECK(locations->Out().Equals(op1_loc));
4695 return;
4696 }
4697
4698 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4699 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4700
4701 // (out := op1)
4702 // out <=? op2
4703 // if out is min jmp done
4704 // out := op2
4705 // done:
4706
4707 if (type == DataType::Type::kInt64) {
4708 __ cmpq(out, op2);
4709 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4710 } else {
4711 DCHECK_EQ(type, DataType::Type::kInt32);
4712 __ cmpl(out, op2);
4713 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4714 }
4715 }
4716
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4717 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4718 bool is_min,
4719 DataType::Type type) {
4720 Location op1_loc = locations->InAt(0);
4721 Location op2_loc = locations->InAt(1);
4722 Location out_loc = locations->Out();
4723 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4724
4725 // Shortcut for same input locations.
4726 if (op1_loc.Equals(op2_loc)) {
4727 DCHECK(out_loc.Equals(op1_loc));
4728 return;
4729 }
4730
4731 // (out := op1)
4732 // out <=? op2
4733 // if Nan jmp Nan_label
4734 // if out is min jmp done
4735 // if op2 is min jmp op2_label
4736 // handle -0/+0
4737 // jmp done
4738 // Nan_label:
4739 // out := NaN
4740 // op2_label:
4741 // out := op2
4742 // done:
4743 //
4744 // This removes one jmp, but needs to copy one input (op1) to out.
4745 //
4746 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4747
4748 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4749
4750 NearLabel nan, done, op2_label;
4751 if (type == DataType::Type::kFloat64) {
4752 __ ucomisd(out, op2);
4753 } else {
4754 DCHECK_EQ(type, DataType::Type::kFloat32);
4755 __ ucomiss(out, op2);
4756 }
4757
4758 __ j(Condition::kParityEven, &nan);
4759
4760 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4761 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4762
4763 // Handle 0.0/-0.0.
4764 if (is_min) {
4765 if (type == DataType::Type::kFloat64) {
4766 __ orpd(out, op2);
4767 } else {
4768 __ orps(out, op2);
4769 }
4770 } else {
4771 if (type == DataType::Type::kFloat64) {
4772 __ andpd(out, op2);
4773 } else {
4774 __ andps(out, op2);
4775 }
4776 }
4777 __ jmp(&done);
4778
4779 // NaN handling.
4780 __ Bind(&nan);
4781 if (type == DataType::Type::kFloat64) {
4782 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4783 } else {
4784 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4785 }
4786 __ jmp(&done);
4787
4788 // out := op2;
4789 __ Bind(&op2_label);
4790 if (type == DataType::Type::kFloat64) {
4791 __ movsd(out, op2);
4792 } else {
4793 __ movss(out, op2);
4794 }
4795
4796 // Done.
4797 __ Bind(&done);
4798 }
4799
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4800 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4801 DataType::Type type = minmax->GetResultType();
4802 switch (type) {
4803 case DataType::Type::kInt32:
4804 case DataType::Type::kInt64:
4805 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4806 break;
4807 case DataType::Type::kFloat32:
4808 case DataType::Type::kFloat64:
4809 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4810 break;
4811 default:
4812 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4813 }
4814 }
4815
VisitMin(HMin * min)4816 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4817 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4818 }
4819
VisitMin(HMin * min)4820 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4821 GenerateMinMax(min, /*is_min*/ true);
4822 }
4823
VisitMax(HMax * max)4824 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4825 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4826 }
4827
VisitMax(HMax * max)4828 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4829 GenerateMinMax(max, /*is_min*/ false);
4830 }
4831
VisitAbs(HAbs * abs)4832 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4833 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4834 switch (abs->GetResultType()) {
4835 case DataType::Type::kInt32:
4836 case DataType::Type::kInt64:
4837 locations->SetInAt(0, Location::RequiresRegister());
4838 locations->SetOut(Location::SameAsFirstInput());
4839 locations->AddTemp(Location::RequiresRegister());
4840 break;
4841 case DataType::Type::kFloat32:
4842 case DataType::Type::kFloat64:
4843 locations->SetInAt(0, Location::RequiresFpuRegister());
4844 locations->SetOut(Location::SameAsFirstInput());
4845 locations->AddTemp(Location::RequiresFpuRegister());
4846 break;
4847 default:
4848 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4849 }
4850 }
4851
VisitAbs(HAbs * abs)4852 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4853 LocationSummary* locations = abs->GetLocations();
4854 switch (abs->GetResultType()) {
4855 case DataType::Type::kInt32: {
4856 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4857 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4858 // Create mask.
4859 __ movl(mask, out);
4860 __ sarl(mask, Immediate(31));
4861 // Add mask.
4862 __ addl(out, mask);
4863 __ xorl(out, mask);
4864 break;
4865 }
4866 case DataType::Type::kInt64: {
4867 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4868 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4869 // Create mask.
4870 __ movq(mask, out);
4871 __ sarq(mask, Immediate(63));
4872 // Add mask.
4873 __ addq(out, mask);
4874 __ xorq(out, mask);
4875 break;
4876 }
4877 case DataType::Type::kFloat32: {
4878 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4879 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4880 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4881 __ andps(out, mask);
4882 break;
4883 }
4884 case DataType::Type::kFloat64: {
4885 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4886 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4887 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4888 __ andpd(out, mask);
4889 break;
4890 }
4891 default:
4892 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4893 }
4894 }
4895
VisitDivZeroCheck(HDivZeroCheck * instruction)4896 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4897 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4898 locations->SetInAt(0, Location::Any());
4899 }
4900
VisitDivZeroCheck(HDivZeroCheck * instruction)4901 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4902 SlowPathCode* slow_path =
4903 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4904 codegen_->AddSlowPath(slow_path);
4905
4906 LocationSummary* locations = instruction->GetLocations();
4907 Location value = locations->InAt(0);
4908
4909 switch (instruction->GetType()) {
4910 case DataType::Type::kBool:
4911 case DataType::Type::kUint8:
4912 case DataType::Type::kInt8:
4913 case DataType::Type::kUint16:
4914 case DataType::Type::kInt16:
4915 case DataType::Type::kInt32: {
4916 if (value.IsRegister()) {
4917 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4918 __ j(kEqual, slow_path->GetEntryLabel());
4919 } else if (value.IsStackSlot()) {
4920 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4921 __ j(kEqual, slow_path->GetEntryLabel());
4922 } else {
4923 DCHECK(value.IsConstant()) << value;
4924 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4925 __ jmp(slow_path->GetEntryLabel());
4926 }
4927 }
4928 break;
4929 }
4930 case DataType::Type::kInt64: {
4931 if (value.IsRegister()) {
4932 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4933 __ j(kEqual, slow_path->GetEntryLabel());
4934 } else if (value.IsDoubleStackSlot()) {
4935 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4936 __ j(kEqual, slow_path->GetEntryLabel());
4937 } else {
4938 DCHECK(value.IsConstant()) << value;
4939 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4940 __ jmp(slow_path->GetEntryLabel());
4941 }
4942 }
4943 break;
4944 }
4945 default:
4946 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4947 }
4948 }
4949
HandleShift(HBinaryOperation * op)4950 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4951 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4952
4953 LocationSummary* locations =
4954 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4955
4956 switch (op->GetResultType()) {
4957 case DataType::Type::kInt32:
4958 case DataType::Type::kInt64: {
4959 locations->SetInAt(0, Location::RequiresRegister());
4960 // The shift count needs to be in CL.
4961 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4962 locations->SetOut(Location::SameAsFirstInput());
4963 break;
4964 }
4965 default:
4966 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4967 }
4968 }
4969
HandleShift(HBinaryOperation * op)4970 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4971 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4972
4973 LocationSummary* locations = op->GetLocations();
4974 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4975 Location second = locations->InAt(1);
4976
4977 switch (op->GetResultType()) {
4978 case DataType::Type::kInt32: {
4979 if (second.IsRegister()) {
4980 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4981 if (op->IsShl()) {
4982 __ shll(first_reg, second_reg);
4983 } else if (op->IsShr()) {
4984 __ sarl(first_reg, second_reg);
4985 } else {
4986 __ shrl(first_reg, second_reg);
4987 }
4988 } else {
4989 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4990 if (op->IsShl()) {
4991 __ shll(first_reg, imm);
4992 } else if (op->IsShr()) {
4993 __ sarl(first_reg, imm);
4994 } else {
4995 __ shrl(first_reg, imm);
4996 }
4997 }
4998 break;
4999 }
5000 case DataType::Type::kInt64: {
5001 if (second.IsRegister()) {
5002 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5003 if (op->IsShl()) {
5004 __ shlq(first_reg, second_reg);
5005 } else if (op->IsShr()) {
5006 __ sarq(first_reg, second_reg);
5007 } else {
5008 __ shrq(first_reg, second_reg);
5009 }
5010 } else {
5011 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5012 if (op->IsShl()) {
5013 __ shlq(first_reg, imm);
5014 } else if (op->IsShr()) {
5015 __ sarq(first_reg, imm);
5016 } else {
5017 __ shrq(first_reg, imm);
5018 }
5019 }
5020 break;
5021 }
5022 default:
5023 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5024 UNREACHABLE();
5025 }
5026 }
5027
VisitRor(HRor * ror)5028 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5029 LocationSummary* locations =
5030 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5031
5032 switch (ror->GetResultType()) {
5033 case DataType::Type::kInt32:
5034 case DataType::Type::kInt64: {
5035 locations->SetInAt(0, Location::RequiresRegister());
5036 // The shift count needs to be in CL (unless it is a constant).
5037 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
5038 locations->SetOut(Location::SameAsFirstInput());
5039 break;
5040 }
5041 default:
5042 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5043 UNREACHABLE();
5044 }
5045 }
5046
VisitRor(HRor * ror)5047 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5048 LocationSummary* locations = ror->GetLocations();
5049 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5050 Location second = locations->InAt(1);
5051
5052 switch (ror->GetResultType()) {
5053 case DataType::Type::kInt32:
5054 if (second.IsRegister()) {
5055 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5056 __ rorl(first_reg, second_reg);
5057 } else {
5058 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5059 __ rorl(first_reg, imm);
5060 }
5061 break;
5062 case DataType::Type::kInt64:
5063 if (second.IsRegister()) {
5064 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5065 __ rorq(first_reg, second_reg);
5066 } else {
5067 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5068 __ rorq(first_reg, imm);
5069 }
5070 break;
5071 default:
5072 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5073 UNREACHABLE();
5074 }
5075 }
5076
VisitShl(HShl * shl)5077 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5078 HandleShift(shl);
5079 }
5080
VisitShl(HShl * shl)5081 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5082 HandleShift(shl);
5083 }
5084
VisitShr(HShr * shr)5085 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5086 HandleShift(shr);
5087 }
5088
VisitShr(HShr * shr)5089 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5090 HandleShift(shr);
5091 }
5092
VisitUShr(HUShr * ushr)5093 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5094 HandleShift(ushr);
5095 }
5096
VisitUShr(HUShr * ushr)5097 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5098 HandleShift(ushr);
5099 }
5100
VisitNewInstance(HNewInstance * instruction)5101 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5102 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5103 instruction, LocationSummary::kCallOnMainOnly);
5104 InvokeRuntimeCallingConvention calling_convention;
5105 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5106 locations->SetOut(Location::RegisterLocation(RAX));
5107 }
5108
VisitNewInstance(HNewInstance * instruction)5109 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5110 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5111 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5112 DCHECK(!codegen_->IsLeafMethod());
5113 }
5114
VisitNewArray(HNewArray * instruction)5115 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5116 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5117 instruction, LocationSummary::kCallOnMainOnly);
5118 InvokeRuntimeCallingConvention calling_convention;
5119 locations->SetOut(Location::RegisterLocation(RAX));
5120 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5121 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5122 }
5123
VisitNewArray(HNewArray * instruction)5124 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5125 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5126 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5127 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5128 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5129 DCHECK(!codegen_->IsLeafMethod());
5130 }
5131
VisitParameterValue(HParameterValue * instruction)5132 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5133 LocationSummary* locations =
5134 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5135 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5136 if (location.IsStackSlot()) {
5137 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5138 } else if (location.IsDoubleStackSlot()) {
5139 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5140 }
5141 locations->SetOut(location);
5142 }
5143
VisitParameterValue(HParameterValue * instruction)5144 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5145 [[maybe_unused]] HParameterValue* instruction) {
5146 // Nothing to do, the parameter is already at its location.
5147 }
5148
VisitCurrentMethod(HCurrentMethod * instruction)5149 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5150 LocationSummary* locations =
5151 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5152 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5153 }
5154
VisitCurrentMethod(HCurrentMethod * instruction)5155 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5156 [[maybe_unused]] HCurrentMethod* instruction) {
5157 // Nothing to do, the method is already at its location.
5158 }
5159
VisitClassTableGet(HClassTableGet * instruction)5160 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5161 LocationSummary* locations =
5162 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5163 locations->SetInAt(0, Location::RequiresRegister());
5164 locations->SetOut(Location::RequiresRegister());
5165 }
5166
VisitClassTableGet(HClassTableGet * instruction)5167 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5168 LocationSummary* locations = instruction->GetLocations();
5169 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5170 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5171 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5172 __ movq(locations->Out().AsRegister<CpuRegister>(),
5173 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5174 } else {
5175 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5176 instruction->GetIndex(), kX86_64PointerSize));
5177 __ movq(locations->Out().AsRegister<CpuRegister>(),
5178 Address(locations->InAt(0).AsRegister<CpuRegister>(),
5179 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5180 __ movq(locations->Out().AsRegister<CpuRegister>(),
5181 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5182 }
5183 }
5184
VisitNot(HNot * not_)5185 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5186 LocationSummary* locations =
5187 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5188 locations->SetInAt(0, Location::RequiresRegister());
5189 locations->SetOut(Location::SameAsFirstInput());
5190 }
5191
VisitNot(HNot * not_)5192 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5193 LocationSummary* locations = not_->GetLocations();
5194 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5195 locations->Out().AsRegister<CpuRegister>().AsRegister());
5196 Location out = locations->Out();
5197 switch (not_->GetResultType()) {
5198 case DataType::Type::kInt32:
5199 __ notl(out.AsRegister<CpuRegister>());
5200 break;
5201
5202 case DataType::Type::kInt64:
5203 __ notq(out.AsRegister<CpuRegister>());
5204 break;
5205
5206 default:
5207 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5208 }
5209 }
5210
VisitBooleanNot(HBooleanNot * bool_not)5211 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5212 LocationSummary* locations =
5213 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5214 locations->SetInAt(0, Location::RequiresRegister());
5215 locations->SetOut(Location::SameAsFirstInput());
5216 }
5217
VisitBooleanNot(HBooleanNot * bool_not)5218 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5219 LocationSummary* locations = bool_not->GetLocations();
5220 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5221 locations->Out().AsRegister<CpuRegister>().AsRegister());
5222 Location out = locations->Out();
5223 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5224 }
5225
VisitPhi(HPhi * instruction)5226 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5227 LocationSummary* locations =
5228 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5229 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5230 locations->SetInAt(i, Location::Any());
5231 }
5232 locations->SetOut(Location::Any());
5233 }
5234
VisitPhi(HPhi * instruction)5235 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5236 LOG(FATAL) << "Unimplemented";
5237 }
5238
GenerateMemoryBarrier(MemBarrierKind kind)5239 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5240 /*
5241 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5242 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5243 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5244 */
5245 switch (kind) {
5246 case MemBarrierKind::kAnyAny: {
5247 MemoryFence();
5248 break;
5249 }
5250 case MemBarrierKind::kAnyStore:
5251 case MemBarrierKind::kLoadAny:
5252 case MemBarrierKind::kStoreStore: {
5253 // nop
5254 break;
5255 }
5256 case MemBarrierKind::kNTStoreStore:
5257 // Non-Temporal Store/Store needs an explicit fence.
5258 MemoryFence(/* non-temporal= */ true);
5259 break;
5260 }
5261 }
5262
HandleFieldGet(HInstruction * instruction)5263 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5264 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5265
5266 bool object_field_get_with_read_barrier =
5267 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5268 LocationSummary* locations =
5269 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5270 object_field_get_with_read_barrier
5271 ? LocationSummary::kCallOnSlowPath
5272 : LocationSummary::kNoCall);
5273 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5274 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5275 }
5276 // receiver_input
5277 locations->SetInAt(0, Location::RequiresRegister());
5278 if (DataType::IsFloatingPointType(instruction->GetType())) {
5279 locations->SetOut(Location::RequiresFpuRegister());
5280 } else {
5281 // The output overlaps for an object field get when read barriers are
5282 // enabled: we do not want the move to overwrite the object's location, as
5283 // we need it to emit the read barrier. For predicated instructions we can
5284 // always overlap since the output is SameAsFirst and the default value.
5285 locations->SetOut(
5286 Location::RequiresRegister(),
5287 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5288 }
5289 }
5290
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5291 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5292 const FieldInfo& field_info) {
5293 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5294
5295 LocationSummary* locations = instruction->GetLocations();
5296 Location base_loc = locations->InAt(0);
5297 CpuRegister base = base_loc.AsRegister<CpuRegister>();
5298 Location out = locations->Out();
5299 bool is_volatile = field_info.IsVolatile();
5300 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5301 DataType::Type load_type = instruction->GetType();
5302 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5303
5304 if (load_type == DataType::Type::kReference) {
5305 // /* HeapReference<Object> */ out = *(base + offset)
5306 if (codegen_->EmitBakerReadBarrier()) {
5307 // Note that a potential implicit null check is handled in this
5308 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5309 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5310 instruction, out, base, offset, /* needs_null_check= */ true);
5311 if (is_volatile) {
5312 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5313 }
5314 } else {
5315 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5316 codegen_->MaybeRecordImplicitNullCheck(instruction);
5317 if (is_volatile) {
5318 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5319 }
5320 // If read barriers are enabled, emit read barriers other than
5321 // Baker's using a slow path (and also unpoison the loaded
5322 // reference, if heap poisoning is enabled).
5323 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5324 }
5325 } else {
5326 codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5327 codegen_->MaybeRecordImplicitNullCheck(instruction);
5328 if (is_volatile) {
5329 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5330 }
5331 }
5332 }
5333
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5334 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5335 const FieldInfo& field_info,
5336 WriteBarrierKind write_barrier_kind) {
5337 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5338
5339 LocationSummary* locations =
5340 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5341 DataType::Type field_type = field_info.GetFieldType();
5342 bool is_volatile = field_info.IsVolatile();
5343 bool needs_write_barrier =
5344 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5345 bool check_gc_card =
5346 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5347
5348 locations->SetInAt(0, Location::RequiresRegister());
5349 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5350 if (is_volatile) {
5351 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5352 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5353 } else {
5354 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5355 }
5356 } else {
5357 if (is_volatile) {
5358 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5359 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5360 } else {
5361 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5362 }
5363 }
5364
5365 // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5366 // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5367 if (needs_write_barrier ||
5368 check_gc_card ||
5369 (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5370 // Temporary registers for the write barrier.
5371 locations->AddTemp(Location::RequiresRegister());
5372 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
5373 }
5374 }
5375
Bswap(Location value,DataType::Type type,CpuRegister * temp)5376 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5377 DataType::Type type,
5378 CpuRegister* temp) {
5379 switch (type) {
5380 case DataType::Type::kInt16:
5381 // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5382 __ bswapl(value.AsRegister<CpuRegister>());
5383 __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5384 break;
5385 case DataType::Type::kUint16:
5386 // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5387 __ bswapl(value.AsRegister<CpuRegister>());
5388 __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5389 break;
5390 case DataType::Type::kInt32:
5391 case DataType::Type::kUint32:
5392 __ bswapl(value.AsRegister<CpuRegister>());
5393 break;
5394 case DataType::Type::kInt64:
5395 case DataType::Type::kUint64:
5396 __ bswapq(value.AsRegister<CpuRegister>());
5397 break;
5398 case DataType::Type::kFloat32: {
5399 DCHECK_NE(temp, nullptr);
5400 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5401 __ bswapl(*temp);
5402 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5403 break;
5404 }
5405 case DataType::Type::kFloat64: {
5406 DCHECK_NE(temp, nullptr);
5407 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5408 __ bswapq(*temp);
5409 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5410 break;
5411 }
5412 default:
5413 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5414 UNREACHABLE();
5415 }
5416 }
5417
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5418 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5419 uint32_t value_index,
5420 uint32_t extra_temp_index,
5421 DataType::Type field_type,
5422 Address field_addr,
5423 CpuRegister base,
5424 bool is_volatile,
5425 bool is_atomic,
5426 bool value_can_be_null,
5427 bool byte_swap,
5428 WriteBarrierKind write_barrier_kind) {
5429 LocationSummary* locations = instruction->GetLocations();
5430 Location value = locations->InAt(value_index);
5431
5432 if (is_volatile) {
5433 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5434 }
5435
5436 bool maybe_record_implicit_null_check_done = false;
5437
5438 if (value.IsConstant()) {
5439 switch (field_type) {
5440 case DataType::Type::kBool:
5441 case DataType::Type::kUint8:
5442 case DataType::Type::kInt8:
5443 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5444 break;
5445 case DataType::Type::kUint16:
5446 case DataType::Type::kInt16: {
5447 int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5448 if (byte_swap) {
5449 v = BSWAP(v);
5450 }
5451 __ movw(field_addr, Immediate(v));
5452 break;
5453 }
5454 case DataType::Type::kUint32:
5455 case DataType::Type::kInt32:
5456 case DataType::Type::kFloat32:
5457 case DataType::Type::kReference: {
5458 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5459 if (byte_swap) {
5460 v = BSWAP(v);
5461 }
5462 DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5463 // Note: if heap poisoning is enabled, no need to poison
5464 // (negate) `v` if it is a reference, as it would be null.
5465 __ movl(field_addr, Immediate(v));
5466 break;
5467 }
5468 case DataType::Type::kUint64:
5469 case DataType::Type::kInt64:
5470 case DataType::Type::kFloat64: {
5471 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5472 if (byte_swap) {
5473 v = BSWAP(v);
5474 }
5475 if (is_atomic) {
5476 // Move constant into a register, then atomically store the register to memory.
5477 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5478 __ movq(temp, Immediate(v));
5479 __ movq(field_addr, temp);
5480 } else {
5481 Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5482 codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5483 }
5484 maybe_record_implicit_null_check_done = true;
5485 break;
5486 }
5487 case DataType::Type::kVoid:
5488 LOG(FATAL) << "Unreachable type " << field_type;
5489 UNREACHABLE();
5490 }
5491 } else {
5492 if (byte_swap) {
5493 // Swap byte order in-place in the input register (we will restore it later).
5494 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5495 Bswap(value, field_type, &temp);
5496 }
5497
5498 switch (field_type) {
5499 case DataType::Type::kBool:
5500 case DataType::Type::kUint8:
5501 case DataType::Type::kInt8:
5502 __ movb(field_addr, value.AsRegister<CpuRegister>());
5503 break;
5504 case DataType::Type::kUint16:
5505 case DataType::Type::kInt16:
5506 __ movw(field_addr, value.AsRegister<CpuRegister>());
5507 break;
5508 case DataType::Type::kUint32:
5509 case DataType::Type::kInt32:
5510 case DataType::Type::kReference:
5511 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5512 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5513 __ movl(temp, value.AsRegister<CpuRegister>());
5514 __ PoisonHeapReference(temp);
5515 __ movl(field_addr, temp);
5516 } else {
5517 __ movl(field_addr, value.AsRegister<CpuRegister>());
5518 }
5519 break;
5520 case DataType::Type::kUint64:
5521 case DataType::Type::kInt64:
5522 __ movq(field_addr, value.AsRegister<CpuRegister>());
5523 break;
5524 case DataType::Type::kFloat32:
5525 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5526 break;
5527 case DataType::Type::kFloat64:
5528 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5529 break;
5530 case DataType::Type::kVoid:
5531 LOG(FATAL) << "Unreachable type " << field_type;
5532 UNREACHABLE();
5533 }
5534
5535 if (byte_swap) {
5536 // Restore byte order.
5537 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5538 Bswap(value, field_type, &temp);
5539 }
5540 }
5541
5542 if (!maybe_record_implicit_null_check_done) {
5543 codegen_->MaybeRecordImplicitNullCheck(instruction);
5544 }
5545
5546 bool needs_write_barrier =
5547 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5548 if (needs_write_barrier) {
5549 if (value.IsConstant()) {
5550 DCHECK(value.GetConstant()->IsNullConstant());
5551 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5552 DCHECK_NE(extra_temp_index, 0u);
5553 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5554 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5555 codegen_->MarkGCCard(temp, card, base);
5556 }
5557 } else {
5558 DCHECK_NE(extra_temp_index, 0u);
5559 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5560 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5561 codegen_->MaybeMarkGCCard(
5562 temp,
5563 card,
5564 base,
5565 value.AsRegister<CpuRegister>(),
5566 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5567 }
5568 } else if (codegen_->ShouldCheckGCCard(
5569 field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5570 DCHECK_NE(extra_temp_index, 0u);
5571 DCHECK(value.IsRegister());
5572 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5573 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5574 codegen_->CheckGCCardIsValid(temp, card, base);
5575 }
5576
5577 if (is_volatile) {
5578 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5579 }
5580 }
5581
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5582 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5583 const FieldInfo& field_info,
5584 bool value_can_be_null,
5585 WriteBarrierKind write_barrier_kind) {
5586 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5587
5588 LocationSummary* locations = instruction->GetLocations();
5589 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5590 bool is_volatile = field_info.IsVolatile();
5591 DataType::Type field_type = field_info.GetFieldType();
5592 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5593
5594 HandleFieldSet(instruction,
5595 /*value_index=*/ 1,
5596 /*extra_temp_index=*/ 1,
5597 field_type,
5598 Address(base, offset),
5599 base,
5600 is_volatile,
5601 /*is_atomic=*/ false,
5602 value_can_be_null,
5603 /*byte_swap=*/ false,
5604 write_barrier_kind);
5605 }
5606
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5607 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5608 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5609 }
5610
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5611 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5612 HandleFieldSet(instruction,
5613 instruction->GetFieldInfo(),
5614 instruction->GetValueCanBeNull(),
5615 instruction->GetWriteBarrierKind());
5616 }
5617
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5618 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5619 HandleFieldGet(instruction);
5620 }
5621
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5622 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5623 HandleFieldGet(instruction, instruction->GetFieldInfo());
5624 }
5625
VisitStaticFieldGet(HStaticFieldGet * instruction)5626 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5627 HandleFieldGet(instruction);
5628 }
5629
VisitStaticFieldGet(HStaticFieldGet * instruction)5630 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5631 HandleFieldGet(instruction, instruction->GetFieldInfo());
5632 }
5633
VisitStaticFieldSet(HStaticFieldSet * instruction)5634 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5635 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5636 }
5637
VisitStaticFieldSet(HStaticFieldSet * instruction)5638 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5639 HandleFieldSet(instruction,
5640 instruction->GetFieldInfo(),
5641 instruction->GetValueCanBeNull(),
5642 instruction->GetWriteBarrierKind());
5643 }
5644
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5645 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5646 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5647 }
5648
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5649 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5650 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5651 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5652 }
5653
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5654 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5655 HUnresolvedInstanceFieldGet* instruction) {
5656 FieldAccessCallingConventionX86_64 calling_convention;
5657 codegen_->CreateUnresolvedFieldLocationSummary(
5658 instruction, instruction->GetFieldType(), calling_convention);
5659 }
5660
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5661 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5662 HUnresolvedInstanceFieldGet* instruction) {
5663 FieldAccessCallingConventionX86_64 calling_convention;
5664 codegen_->GenerateUnresolvedFieldAccess(instruction,
5665 instruction->GetFieldType(),
5666 instruction->GetFieldIndex(),
5667 instruction->GetDexPc(),
5668 calling_convention);
5669 }
5670
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5671 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5672 HUnresolvedInstanceFieldSet* instruction) {
5673 FieldAccessCallingConventionX86_64 calling_convention;
5674 codegen_->CreateUnresolvedFieldLocationSummary(
5675 instruction, instruction->GetFieldType(), calling_convention);
5676 }
5677
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5678 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5679 HUnresolvedInstanceFieldSet* instruction) {
5680 FieldAccessCallingConventionX86_64 calling_convention;
5681 codegen_->GenerateUnresolvedFieldAccess(instruction,
5682 instruction->GetFieldType(),
5683 instruction->GetFieldIndex(),
5684 instruction->GetDexPc(),
5685 calling_convention);
5686 }
5687
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5688 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5689 HUnresolvedStaticFieldGet* instruction) {
5690 FieldAccessCallingConventionX86_64 calling_convention;
5691 codegen_->CreateUnresolvedFieldLocationSummary(
5692 instruction, instruction->GetFieldType(), calling_convention);
5693 }
5694
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5695 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5696 HUnresolvedStaticFieldGet* instruction) {
5697 FieldAccessCallingConventionX86_64 calling_convention;
5698 codegen_->GenerateUnresolvedFieldAccess(instruction,
5699 instruction->GetFieldType(),
5700 instruction->GetFieldIndex(),
5701 instruction->GetDexPc(),
5702 calling_convention);
5703 }
5704
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5705 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5706 HUnresolvedStaticFieldSet* instruction) {
5707 FieldAccessCallingConventionX86_64 calling_convention;
5708 codegen_->CreateUnresolvedFieldLocationSummary(
5709 instruction, instruction->GetFieldType(), calling_convention);
5710 }
5711
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5712 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5713 HUnresolvedStaticFieldSet* instruction) {
5714 FieldAccessCallingConventionX86_64 calling_convention;
5715 codegen_->GenerateUnresolvedFieldAccess(instruction,
5716 instruction->GetFieldType(),
5717 instruction->GetFieldIndex(),
5718 instruction->GetDexPc(),
5719 calling_convention);
5720 }
5721
VisitNullCheck(HNullCheck * instruction)5722 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5723 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5724 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5725 ? Location::RequiresRegister()
5726 : Location::Any();
5727 locations->SetInAt(0, loc);
5728 }
5729
GenerateImplicitNullCheck(HNullCheck * instruction)5730 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5731 if (CanMoveNullCheckToUser(instruction)) {
5732 return;
5733 }
5734 LocationSummary* locations = instruction->GetLocations();
5735 Location obj = locations->InAt(0);
5736
5737 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5738 RecordPcInfo(instruction, instruction->GetDexPc());
5739 }
5740
GenerateExplicitNullCheck(HNullCheck * instruction)5741 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5742 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5743 AddSlowPath(slow_path);
5744
5745 LocationSummary* locations = instruction->GetLocations();
5746 Location obj = locations->InAt(0);
5747
5748 if (obj.IsRegister()) {
5749 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5750 } else if (obj.IsStackSlot()) {
5751 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5752 } else {
5753 DCHECK(obj.IsConstant()) << obj;
5754 DCHECK(obj.GetConstant()->IsNullConstant());
5755 __ jmp(slow_path->GetEntryLabel());
5756 return;
5757 }
5758 __ j(kEqual, slow_path->GetEntryLabel());
5759 }
5760
VisitNullCheck(HNullCheck * instruction)5761 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5762 codegen_->GenerateNullCheck(instruction);
5763 }
5764
VisitArrayGet(HArrayGet * instruction)5765 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5766 bool object_array_get_with_read_barrier =
5767 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5768 LocationSummary* locations =
5769 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5770 object_array_get_with_read_barrier
5771 ? LocationSummary::kCallOnSlowPath
5772 : LocationSummary::kNoCall);
5773 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5774 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5775 }
5776 locations->SetInAt(0, Location::RequiresRegister());
5777 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5778 if (DataType::IsFloatingPointType(instruction->GetType())) {
5779 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5780 } else {
5781 // The output overlaps for an object array get when read barriers
5782 // are enabled: we do not want the move to overwrite the array's
5783 // location, as we need it to emit the read barrier.
5784 locations->SetOut(
5785 Location::RequiresRegister(),
5786 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5787 }
5788 }
5789
VisitArrayGet(HArrayGet * instruction)5790 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5791 LocationSummary* locations = instruction->GetLocations();
5792 Location obj_loc = locations->InAt(0);
5793 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5794 Location index = locations->InAt(1);
5795 Location out_loc = locations->Out();
5796 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5797
5798 DataType::Type type = instruction->GetType();
5799 if (type == DataType::Type::kReference) {
5800 static_assert(
5801 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5802 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5803 // /* HeapReference<Object> */ out =
5804 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5805 if (codegen_->EmitBakerReadBarrier()) {
5806 // Note that a potential implicit null check is handled in this
5807 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5808 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5809 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5810 } else {
5811 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5812 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5813 codegen_->MaybeRecordImplicitNullCheck(instruction);
5814 // If read barriers are enabled, emit read barriers other than
5815 // Baker's using a slow path (and also unpoison the loaded
5816 // reference, if heap poisoning is enabled).
5817 if (index.IsConstant()) {
5818 uint32_t offset =
5819 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5820 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5821 } else {
5822 codegen_->MaybeGenerateReadBarrierSlow(
5823 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5824 }
5825 }
5826 } else {
5827 if (type == DataType::Type::kUint16
5828 && mirror::kUseStringCompression
5829 && instruction->IsStringCharAt()) {
5830 // Branch cases into compressed and uncompressed for each index's type.
5831 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5832 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5833 NearLabel done, not_compressed;
5834 __ testb(Address(obj, count_offset), Immediate(1));
5835 codegen_->MaybeRecordImplicitNullCheck(instruction);
5836 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5837 "Expecting 0=compressed, 1=uncompressed");
5838 __ j(kNotZero, ¬_compressed);
5839 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5840 __ jmp(&done);
5841 __ Bind(¬_compressed);
5842 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5843 __ Bind(&done);
5844 } else {
5845 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5846 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5847 codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5848 }
5849 codegen_->MaybeRecordImplicitNullCheck(instruction);
5850 }
5851 }
5852
VisitArraySet(HArraySet * instruction)5853 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5854 DataType::Type value_type = instruction->GetComponentType();
5855
5856 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5857 bool needs_write_barrier =
5858 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5859 bool check_gc_card =
5860 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5861 bool needs_type_check = instruction->NeedsTypeCheck();
5862
5863 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5864 instruction,
5865 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5866
5867 locations->SetInAt(0, Location::RequiresRegister());
5868 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5869 if (DataType::IsFloatingPointType(value_type)) {
5870 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5871 } else {
5872 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5873 }
5874
5875 if (needs_write_barrier || check_gc_card) {
5876 // Used by reference poisoning, type checking, emitting write barrier, or checking write
5877 // barrier.
5878 locations->AddTemp(Location::RequiresRegister());
5879 // Only used when emitting a write barrier, or when checking for the card table.
5880 locations->AddTemp(Location::RequiresRegister());
5881 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5882 instruction->NeedsTypeCheck()) {
5883 // Used for poisoning or type checking.
5884 locations->AddTemp(Location::RequiresRegister());
5885 }
5886 }
5887
VisitArraySet(HArraySet * instruction)5888 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5889 LocationSummary* locations = instruction->GetLocations();
5890 Location array_loc = locations->InAt(0);
5891 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5892 Location index = locations->InAt(1);
5893 Location value = locations->InAt(2);
5894 DataType::Type value_type = instruction->GetComponentType();
5895 bool needs_type_check = instruction->NeedsTypeCheck();
5896 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5897 bool needs_write_barrier =
5898 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5899
5900 switch (value_type) {
5901 case DataType::Type::kBool:
5902 case DataType::Type::kUint8:
5903 case DataType::Type::kInt8: {
5904 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5905 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5906 if (value.IsRegister()) {
5907 __ movb(address, value.AsRegister<CpuRegister>());
5908 } else {
5909 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5910 }
5911 codegen_->MaybeRecordImplicitNullCheck(instruction);
5912 break;
5913 }
5914
5915 case DataType::Type::kUint16:
5916 case DataType::Type::kInt16: {
5917 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5918 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5919 if (value.IsRegister()) {
5920 __ movw(address, value.AsRegister<CpuRegister>());
5921 } else {
5922 DCHECK(value.IsConstant()) << value;
5923 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5924 }
5925 codegen_->MaybeRecordImplicitNullCheck(instruction);
5926 break;
5927 }
5928
5929 case DataType::Type::kReference: {
5930 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5931 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5932
5933 if (!value.IsRegister()) {
5934 // Just setting null.
5935 DCHECK(instruction->InputAt(2)->IsNullConstant());
5936 DCHECK(value.IsConstant()) << value;
5937 __ movl(address, Immediate(0));
5938 codegen_->MaybeRecordImplicitNullCheck(instruction);
5939 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5940 // We need to set a write barrier here even though we are writing null, since this write
5941 // barrier is being relied on.
5942 DCHECK(needs_write_barrier);
5943 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5944 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5945 codegen_->MarkGCCard(temp, card, array);
5946 }
5947 DCHECK(!needs_type_check);
5948 break;
5949 }
5950
5951 CpuRegister register_value = value.AsRegister<CpuRegister>();
5952 const bool can_value_be_null = instruction->GetValueCanBeNull();
5953 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
5954 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
5955 // value is null for the type check).
5956 const bool skip_marking_gc_card =
5957 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
5958 NearLabel do_store;
5959 NearLabel skip_writing_card;
5960 if (can_value_be_null) {
5961 __ testl(register_value, register_value);
5962 if (skip_marking_gc_card) {
5963 __ j(kEqual, &skip_writing_card);
5964 } else {
5965 __ j(kEqual, &do_store);
5966 }
5967 }
5968
5969 SlowPathCode* slow_path = nullptr;
5970 if (needs_type_check) {
5971 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5972 codegen_->AddSlowPath(slow_path);
5973
5974 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5975 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5976 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5977
5978 // Note that when Baker read barriers are enabled, the type
5979 // checks are performed without read barriers. This is fine,
5980 // even in the case where a class object is in the from-space
5981 // after the flip, as a comparison involving such a type would
5982 // not produce a false positive; it may of course produce a
5983 // false negative, in which case we would take the ArraySet
5984 // slow path.
5985
5986 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5987 // /* HeapReference<Class> */ temp = array->klass_
5988 __ movl(temp, Address(array, class_offset));
5989 codegen_->MaybeRecordImplicitNullCheck(instruction);
5990 __ MaybeUnpoisonHeapReference(temp);
5991
5992 // /* HeapReference<Class> */ temp = temp->component_type_
5993 __ movl(temp, Address(temp, component_offset));
5994 // If heap poisoning is enabled, no need to unpoison `temp`
5995 // nor the object reference in `register_value->klass`, as
5996 // we are comparing two poisoned references.
5997 __ cmpl(temp, Address(register_value, class_offset));
5998
5999 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6000 NearLabel do_put;
6001 __ j(kEqual, &do_put);
6002 // If heap poisoning is enabled, the `temp` reference has
6003 // not been unpoisoned yet; unpoison it now.
6004 __ MaybeUnpoisonHeapReference(temp);
6005
6006 // If heap poisoning is enabled, no need to unpoison the
6007 // heap reference loaded below, as it is only used for a
6008 // comparison with null.
6009 __ cmpl(Address(temp, super_offset), Immediate(0));
6010 __ j(kNotEqual, slow_path->GetEntryLabel());
6011 __ Bind(&do_put);
6012 } else {
6013 __ j(kNotEqual, slow_path->GetEntryLabel());
6014 }
6015 }
6016
6017 if (can_value_be_null && !skip_marking_gc_card) {
6018 DCHECK(do_store.IsLinked());
6019 __ Bind(&do_store);
6020 }
6021
6022 if (needs_write_barrier) {
6023 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6024 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6025 codegen_->MarkGCCard(temp, card, array);
6026 } else if (codegen_->ShouldCheckGCCard(
6027 value_type, instruction->GetValue(), write_barrier_kind)) {
6028 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6029 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6030 codegen_->CheckGCCardIsValid(temp, card, array);
6031 }
6032
6033 if (skip_marking_gc_card) {
6034 // Note that we don't check that the GC card is valid as it can be correctly clean.
6035 DCHECK(skip_writing_card.IsLinked());
6036 __ Bind(&skip_writing_card);
6037 }
6038
6039 Location source = value;
6040 if (kPoisonHeapReferences) {
6041 Location temp_loc = locations->GetTemp(0);
6042 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6043 __ movl(temp, register_value);
6044 __ PoisonHeapReference(temp);
6045 source = temp_loc;
6046 }
6047
6048 __ movl(address, source.AsRegister<CpuRegister>());
6049
6050 if (can_value_be_null || !needs_type_check) {
6051 codegen_->MaybeRecordImplicitNullCheck(instruction);
6052 }
6053
6054 if (slow_path != nullptr) {
6055 __ Bind(slow_path->GetExitLabel());
6056 }
6057
6058 break;
6059 }
6060
6061 case DataType::Type::kInt32: {
6062 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6063 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6064 if (value.IsRegister()) {
6065 __ movl(address, value.AsRegister<CpuRegister>());
6066 } else {
6067 DCHECK(value.IsConstant()) << value;
6068 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6069 __ movl(address, Immediate(v));
6070 }
6071 codegen_->MaybeRecordImplicitNullCheck(instruction);
6072 break;
6073 }
6074
6075 case DataType::Type::kInt64: {
6076 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6077 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6078 if (value.IsRegister()) {
6079 __ movq(address, value.AsRegister<CpuRegister>());
6080 codegen_->MaybeRecordImplicitNullCheck(instruction);
6081 } else {
6082 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6083 Address address_high =
6084 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6085 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6086 }
6087 break;
6088 }
6089
6090 case DataType::Type::kFloat32: {
6091 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6092 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6093 if (value.IsFpuRegister()) {
6094 __ movss(address, value.AsFpuRegister<XmmRegister>());
6095 } else {
6096 DCHECK(value.IsConstant());
6097 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6098 __ movl(address, Immediate(v));
6099 }
6100 codegen_->MaybeRecordImplicitNullCheck(instruction);
6101 break;
6102 }
6103
6104 case DataType::Type::kFloat64: {
6105 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6106 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6107 if (value.IsFpuRegister()) {
6108 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6109 codegen_->MaybeRecordImplicitNullCheck(instruction);
6110 } else {
6111 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6112 Address address_high =
6113 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6114 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6115 }
6116 break;
6117 }
6118
6119 case DataType::Type::kUint32:
6120 case DataType::Type::kUint64:
6121 case DataType::Type::kVoid:
6122 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6123 UNREACHABLE();
6124 }
6125 }
6126
VisitArrayLength(HArrayLength * instruction)6127 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6128 LocationSummary* locations =
6129 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6130 locations->SetInAt(0, Location::RequiresRegister());
6131 if (!instruction->IsEmittedAtUseSite()) {
6132 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6133 }
6134 }
6135
VisitArrayLength(HArrayLength * instruction)6136 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6137 if (instruction->IsEmittedAtUseSite()) {
6138 return;
6139 }
6140
6141 LocationSummary* locations = instruction->GetLocations();
6142 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6143 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6144 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6145 __ movl(out, Address(obj, offset));
6146 codegen_->MaybeRecordImplicitNullCheck(instruction);
6147 // Mask out most significant bit in case the array is String's array of char.
6148 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6149 __ shrl(out, Immediate(1));
6150 }
6151 }
6152
VisitBoundsCheck(HBoundsCheck * instruction)6153 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6154 RegisterSet caller_saves = RegisterSet::Empty();
6155 InvokeRuntimeCallingConvention calling_convention;
6156 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6157 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6158 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6159 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6160 HInstruction* length = instruction->InputAt(1);
6161 if (!length->IsEmittedAtUseSite()) {
6162 locations->SetInAt(1, Location::RegisterOrConstant(length));
6163 }
6164 }
6165
VisitBoundsCheck(HBoundsCheck * instruction)6166 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6167 LocationSummary* locations = instruction->GetLocations();
6168 Location index_loc = locations->InAt(0);
6169 Location length_loc = locations->InAt(1);
6170 SlowPathCode* slow_path =
6171 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6172
6173 if (length_loc.IsConstant()) {
6174 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6175 if (index_loc.IsConstant()) {
6176 // BCE will remove the bounds check if we are guarenteed to pass.
6177 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6178 if (index < 0 || index >= length) {
6179 codegen_->AddSlowPath(slow_path);
6180 __ jmp(slow_path->GetEntryLabel());
6181 } else {
6182 // Some optimization after BCE may have generated this, and we should not
6183 // generate a bounds check if it is a valid range.
6184 }
6185 return;
6186 }
6187
6188 // We have to reverse the jump condition because the length is the constant.
6189 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6190 __ cmpl(index_reg, Immediate(length));
6191 codegen_->AddSlowPath(slow_path);
6192 __ j(kAboveEqual, slow_path->GetEntryLabel());
6193 } else {
6194 HInstruction* array_length = instruction->InputAt(1);
6195 if (array_length->IsEmittedAtUseSite()) {
6196 // Address the length field in the array.
6197 DCHECK(array_length->IsArrayLength());
6198 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6199 Location array_loc = array_length->GetLocations()->InAt(0);
6200 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6201 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6202 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6203 // the string compression flag) with the in-memory length and avoid the temporary.
6204 CpuRegister length_reg = CpuRegister(TMP);
6205 __ movl(length_reg, array_len);
6206 codegen_->MaybeRecordImplicitNullCheck(array_length);
6207 __ shrl(length_reg, Immediate(1));
6208 codegen_->GenerateIntCompare(length_reg, index_loc);
6209 } else {
6210 // Checking the bound for general case:
6211 // Array of char or String's array when the compression feature off.
6212 if (index_loc.IsConstant()) {
6213 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6214 __ cmpl(array_len, Immediate(value));
6215 } else {
6216 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6217 }
6218 codegen_->MaybeRecordImplicitNullCheck(array_length);
6219 }
6220 } else {
6221 codegen_->GenerateIntCompare(length_loc, index_loc);
6222 }
6223 codegen_->AddSlowPath(slow_path);
6224 __ j(kBelowEqual, slow_path->GetEntryLabel());
6225 }
6226 }
6227
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6228 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6229 CpuRegister card,
6230 CpuRegister object,
6231 CpuRegister value,
6232 bool emit_null_check) {
6233 NearLabel is_null;
6234 if (emit_null_check) {
6235 __ testl(value, value);
6236 __ j(kEqual, &is_null);
6237 }
6238 MarkGCCard(temp, card, object);
6239 if (emit_null_check) {
6240 __ Bind(&is_null);
6241 }
6242 }
6243
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6244 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6245 // Load the address of the card table into `card`.
6246 __ gs()->movq(card,
6247 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6248 /* no_rip= */ true));
6249 // Calculate the offset (in the card table) of the card corresponding to `object`.
6250 __ movq(temp, object);
6251 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6252 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6253 // `object`'s card.
6254 //
6255 // Register `card` contains the address of the card table. Note that the card
6256 // table's base is biased during its creation so that it always starts at an
6257 // address whose least-significant byte is equal to `kCardDirty` (see
6258 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6259 // below writes the `kCardDirty` (byte) value into the `object`'s card
6260 // (located at `card + object >> kCardShift`).
6261 //
6262 // This dual use of the value in register `card` (1. to calculate the location
6263 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6264 // (no need to explicitly load `kCardDirty` as an immediate value).
6265 __ movb(Address(temp, card, TIMES_1, 0), card);
6266 }
6267
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6268 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6269 CpuRegister card,
6270 CpuRegister object) {
6271 NearLabel done;
6272 // Load the address of the card table into `card`.
6273 __ gs()->movq(card,
6274 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6275 /* no_rip= */ true));
6276 // Calculate the offset (in the card table) of the card corresponding to `object`.
6277 __ movq(temp, object);
6278 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6279 // assert (!clean || !self->is_gc_marking)
6280 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6281 __ j(kNotEqual, &done);
6282 __ gs()->cmpl(
6283 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6284 Immediate(0));
6285 __ j(kEqual, &done);
6286 __ int3();
6287 __ Bind(&done);
6288 }
6289
VisitParallelMove(HParallelMove * instruction)6290 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6291 LOG(FATAL) << "Unimplemented";
6292 }
6293
VisitParallelMove(HParallelMove * instruction)6294 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6295 if (instruction->GetNext()->IsSuspendCheck() &&
6296 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6297 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6298 // The back edge will generate the suspend check.
6299 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6300 }
6301
6302 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6303 }
6304
VisitSuspendCheck(HSuspendCheck * instruction)6305 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6306 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6307 instruction, LocationSummary::kCallOnSlowPath);
6308 // In suspend check slow path, usually there are no caller-save registers at all.
6309 // If SIMD instructions are present, however, we force spilling all live SIMD
6310 // registers in full width (since the runtime only saves/restores lower part).
6311 locations->SetCustomSlowPathCallerSaves(
6312 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6313 }
6314
VisitSuspendCheck(HSuspendCheck * instruction)6315 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6316 HBasicBlock* block = instruction->GetBlock();
6317 if (block->GetLoopInformation() != nullptr) {
6318 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6319 // The back edge will generate the suspend check.
6320 return;
6321 }
6322 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6323 // The goto will generate the suspend check.
6324 return;
6325 }
6326 GenerateSuspendCheck(instruction, nullptr);
6327 }
6328
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6329 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6330 HBasicBlock* successor) {
6331 SuspendCheckSlowPathX86_64* slow_path =
6332 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6333 if (slow_path == nullptr) {
6334 slow_path =
6335 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6336 instruction->SetSlowPath(slow_path);
6337 codegen_->AddSlowPath(slow_path);
6338 if (successor != nullptr) {
6339 DCHECK(successor->IsLoopHeader());
6340 }
6341 } else {
6342 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6343 }
6344
6345 __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6346 /* no_rip= */ true),
6347 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6348 if (successor == nullptr) {
6349 __ j(kNotZero, slow_path->GetEntryLabel());
6350 __ Bind(slow_path->GetReturnLabel());
6351 } else {
6352 __ j(kZero, codegen_->GetLabelOf(successor));
6353 __ jmp(slow_path->GetEntryLabel());
6354 }
6355 }
6356
GetAssembler() const6357 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6358 return codegen_->GetAssembler();
6359 }
6360
EmitMove(size_t index)6361 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6362 MoveOperands* move = moves_[index];
6363 Location source = move->GetSource();
6364 Location destination = move->GetDestination();
6365
6366 if (source.IsRegister()) {
6367 if (destination.IsRegister()) {
6368 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6369 } else if (destination.IsStackSlot()) {
6370 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6371 source.AsRegister<CpuRegister>());
6372 } else {
6373 DCHECK(destination.IsDoubleStackSlot());
6374 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6375 source.AsRegister<CpuRegister>());
6376 }
6377 } else if (source.IsStackSlot()) {
6378 if (destination.IsRegister()) {
6379 __ movl(destination.AsRegister<CpuRegister>(),
6380 Address(CpuRegister(RSP), source.GetStackIndex()));
6381 } else if (destination.IsFpuRegister()) {
6382 __ movss(destination.AsFpuRegister<XmmRegister>(),
6383 Address(CpuRegister(RSP), source.GetStackIndex()));
6384 } else {
6385 DCHECK(destination.IsStackSlot());
6386 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6387 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6388 }
6389 } else if (source.IsDoubleStackSlot()) {
6390 if (destination.IsRegister()) {
6391 __ movq(destination.AsRegister<CpuRegister>(),
6392 Address(CpuRegister(RSP), source.GetStackIndex()));
6393 } else if (destination.IsFpuRegister()) {
6394 __ movsd(destination.AsFpuRegister<XmmRegister>(),
6395 Address(CpuRegister(RSP), source.GetStackIndex()));
6396 } else {
6397 DCHECK(destination.IsDoubleStackSlot()) << destination;
6398 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6399 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6400 }
6401 } else if (source.IsSIMDStackSlot()) {
6402 if (destination.IsFpuRegister()) {
6403 __ movups(destination.AsFpuRegister<XmmRegister>(),
6404 Address(CpuRegister(RSP), source.GetStackIndex()));
6405 } else {
6406 DCHECK(destination.IsSIMDStackSlot());
6407 size_t high = kX86_64WordSize;
6408 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6409 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6410 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6411 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6412 }
6413 } else if (source.IsConstant()) {
6414 HConstant* constant = source.GetConstant();
6415 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6416 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6417 if (destination.IsRegister()) {
6418 if (value == 0) {
6419 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6420 } else {
6421 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6422 }
6423 } else {
6424 DCHECK(destination.IsStackSlot()) << destination;
6425 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6426 }
6427 } else if (constant->IsLongConstant()) {
6428 int64_t value = constant->AsLongConstant()->GetValue();
6429 if (destination.IsRegister()) {
6430 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6431 } else {
6432 DCHECK(destination.IsDoubleStackSlot()) << destination;
6433 codegen_->Store64BitValueToStack(destination, value);
6434 }
6435 } else if (constant->IsFloatConstant()) {
6436 float fp_value = constant->AsFloatConstant()->GetValue();
6437 if (destination.IsFpuRegister()) {
6438 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6439 codegen_->Load32BitValue(dest, fp_value);
6440 } else {
6441 DCHECK(destination.IsStackSlot()) << destination;
6442 Immediate imm(bit_cast<int32_t, float>(fp_value));
6443 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6444 }
6445 } else {
6446 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6447 double fp_value = constant->AsDoubleConstant()->GetValue();
6448 int64_t value = bit_cast<int64_t, double>(fp_value);
6449 if (destination.IsFpuRegister()) {
6450 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6451 codegen_->Load64BitValue(dest, fp_value);
6452 } else {
6453 DCHECK(destination.IsDoubleStackSlot()) << destination;
6454 codegen_->Store64BitValueToStack(destination, value);
6455 }
6456 }
6457 } else if (source.IsFpuRegister()) {
6458 if (destination.IsFpuRegister()) {
6459 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6460 } else if (destination.IsStackSlot()) {
6461 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6462 source.AsFpuRegister<XmmRegister>());
6463 } else if (destination.IsDoubleStackSlot()) {
6464 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6465 source.AsFpuRegister<XmmRegister>());
6466 } else {
6467 DCHECK(destination.IsSIMDStackSlot());
6468 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6469 source.AsFpuRegister<XmmRegister>());
6470 }
6471 }
6472 }
6473
Exchange32(CpuRegister reg,int mem)6474 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6475 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6476 __ movl(Address(CpuRegister(RSP), mem), reg);
6477 __ movl(reg, CpuRegister(TMP));
6478 }
6479
Exchange64(CpuRegister reg1,CpuRegister reg2)6480 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6481 __ movq(CpuRegister(TMP), reg1);
6482 __ movq(reg1, reg2);
6483 __ movq(reg2, CpuRegister(TMP));
6484 }
6485
Exchange64(CpuRegister reg,int mem)6486 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6487 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6488 __ movq(Address(CpuRegister(RSP), mem), reg);
6489 __ movq(reg, CpuRegister(TMP));
6490 }
6491
Exchange32(XmmRegister reg,int mem)6492 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6493 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6494 __ movss(Address(CpuRegister(RSP), mem), reg);
6495 __ movd(reg, CpuRegister(TMP));
6496 }
6497
Exchange64(XmmRegister reg,int mem)6498 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6499 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6500 __ movsd(Address(CpuRegister(RSP), mem), reg);
6501 __ movd(reg, CpuRegister(TMP));
6502 }
6503
Exchange128(XmmRegister reg,int mem)6504 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6505 size_t extra_slot = 2 * kX86_64WordSize;
6506 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6507 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6508 ExchangeMemory64(0, mem + extra_slot, 2);
6509 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6510 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6511 }
6512
ExchangeMemory32(int mem1,int mem2)6513 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6514 ScratchRegisterScope ensure_scratch(
6515 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6516
6517 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6518 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6519 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6520 Address(CpuRegister(RSP), mem2 + stack_offset));
6521 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6522 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6523 CpuRegister(ensure_scratch.GetRegister()));
6524 }
6525
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6526 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6527 ScratchRegisterScope ensure_scratch(
6528 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6529
6530 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6531
6532 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6533 for (int i = 0; i < num_of_qwords; i++) {
6534 __ movq(CpuRegister(TMP),
6535 Address(CpuRegister(RSP), mem1 + stack_offset));
6536 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6537 Address(CpuRegister(RSP), mem2 + stack_offset));
6538 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6539 CpuRegister(TMP));
6540 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6541 CpuRegister(ensure_scratch.GetRegister()));
6542 stack_offset += kX86_64WordSize;
6543 }
6544 }
6545
EmitSwap(size_t index)6546 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6547 MoveOperands* move = moves_[index];
6548 Location source = move->GetSource();
6549 Location destination = move->GetDestination();
6550
6551 if (source.IsRegister() && destination.IsRegister()) {
6552 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6553 } else if (source.IsRegister() && destination.IsStackSlot()) {
6554 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6555 } else if (source.IsStackSlot() && destination.IsRegister()) {
6556 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6557 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6558 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6559 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6560 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6561 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6562 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6563 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6564 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6565 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6566 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6567 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6568 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6569 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6570 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6571 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6572 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6573 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6574 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6575 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6576 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6577 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6578 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6579 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6580 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6581 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6582 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6583 } else {
6584 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6585 }
6586 }
6587
6588
SpillScratch(int reg)6589 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6590 __ pushq(CpuRegister(reg));
6591 }
6592
6593
RestoreScratch(int reg)6594 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6595 __ popq(CpuRegister(reg));
6596 }
6597
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6598 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6599 SlowPathCode* slow_path, CpuRegister class_reg) {
6600 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6601 __ j(kBelow, slow_path->GetEntryLabel());
6602 __ Bind(slow_path->GetExitLabel());
6603 }
6604
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6605 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6606 CpuRegister temp) {
6607 uint32_t path_to_root = check->GetBitstringPathToRoot();
6608 uint32_t mask = check->GetBitstringMask();
6609 DCHECK(IsPowerOfTwo(mask + 1));
6610 size_t mask_bits = WhichPowerOf2(mask + 1);
6611
6612 if (mask_bits == 16u) {
6613 // Compare the bitstring in memory.
6614 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6615 } else {
6616 // /* uint32_t */ temp = temp->status_
6617 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6618 // Compare the bitstring bits using SUB.
6619 __ subl(temp, Immediate(path_to_root));
6620 // Shift out bits that do not contribute to the comparison.
6621 __ shll(temp, Immediate(32u - mask_bits));
6622 }
6623 }
6624
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6625 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6626 HLoadClass::LoadKind desired_class_load_kind) {
6627 switch (desired_class_load_kind) {
6628 case HLoadClass::LoadKind::kInvalid:
6629 LOG(FATAL) << "UNREACHABLE";
6630 UNREACHABLE();
6631 case HLoadClass::LoadKind::kReferrersClass:
6632 break;
6633 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6634 case HLoadClass::LoadKind::kBootImageRelRo:
6635 case HLoadClass::LoadKind::kAppImageRelRo:
6636 case HLoadClass::LoadKind::kBssEntry:
6637 case HLoadClass::LoadKind::kBssEntryPublic:
6638 case HLoadClass::LoadKind::kBssEntryPackage:
6639 DCHECK(!GetCompilerOptions().IsJitCompiler());
6640 break;
6641 case HLoadClass::LoadKind::kJitBootImageAddress:
6642 case HLoadClass::LoadKind::kJitTableAddress:
6643 DCHECK(GetCompilerOptions().IsJitCompiler());
6644 break;
6645 case HLoadClass::LoadKind::kRuntimeCall:
6646 break;
6647 }
6648 return desired_class_load_kind;
6649 }
6650
VisitLoadClass(HLoadClass * cls)6651 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6652 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6653 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6654 // Custom calling convention: RAX serves as both input and output.
6655 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6656 cls,
6657 Location::RegisterLocation(RAX),
6658 Location::RegisterLocation(RAX));
6659 return;
6660 }
6661 DCHECK_EQ(cls->NeedsAccessCheck(),
6662 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6663 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6664
6665 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6666 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6667 ? LocationSummary::kCallOnSlowPath
6668 : LocationSummary::kNoCall;
6669 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6670 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6671 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6672 }
6673
6674 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6675 locations->SetInAt(0, Location::RequiresRegister());
6676 }
6677 locations->SetOut(Location::RequiresRegister());
6678 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6679 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6680 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6681 if (codegen_->EmitNonBakerReadBarrier()) {
6682 // For non-Baker read barrier we have a temp-clobbering call.
6683 } else {
6684 // Rely on the type resolution and/or initialization to save everything.
6685 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6686 }
6687 }
6688 }
6689
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6690 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6691 dex::TypeIndex type_index,
6692 Handle<mirror::Class> handle) {
6693 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6694 // Add a patch entry and return the label.
6695 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6696 PatchInfo<Label>* info = &jit_class_patches_.back();
6697 return &info->label;
6698 }
6699
6700 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6701 // move.
VisitLoadClass(HLoadClass * cls)6702 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6703 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6704 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6705 codegen_->GenerateLoadClassRuntimeCall(cls);
6706 return;
6707 }
6708 DCHECK_EQ(cls->NeedsAccessCheck(),
6709 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6710 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6711
6712 LocationSummary* locations = cls->GetLocations();
6713 Location out_loc = locations->Out();
6714 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6715
6716 const ReadBarrierOption read_barrier_option =
6717 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6718 bool generate_null_check = false;
6719 switch (load_kind) {
6720 case HLoadClass::LoadKind::kReferrersClass: {
6721 DCHECK(!cls->CanCallRuntime());
6722 DCHECK(!cls->MustGenerateClinitCheck());
6723 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6724 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6725 GenerateGcRootFieldLoad(
6726 cls,
6727 out_loc,
6728 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6729 /* fixup_label= */ nullptr,
6730 read_barrier_option);
6731 break;
6732 }
6733 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6734 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6735 codegen_->GetCompilerOptions().IsBootImageExtension());
6736 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6737 __ leal(out,
6738 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6739 codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6740 break;
6741 case HLoadClass::LoadKind::kBootImageRelRo: {
6742 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6743 __ movl(out,
6744 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6745 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6746 break;
6747 }
6748 case HLoadClass::LoadKind::kAppImageRelRo: {
6749 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6750 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6751 __ movl(out,
6752 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6753 codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6754 break;
6755 }
6756 case HLoadClass::LoadKind::kBssEntry:
6757 case HLoadClass::LoadKind::kBssEntryPublic:
6758 case HLoadClass::LoadKind::kBssEntryPackage: {
6759 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6760 /* no_rip= */ false);
6761 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6762 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6763 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6764 // No need for memory fence, thanks to the x86-64 memory model.
6765 generate_null_check = true;
6766 break;
6767 }
6768 case HLoadClass::LoadKind::kJitBootImageAddress: {
6769 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6770 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6771 DCHECK_NE(address, 0u);
6772 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6773 break;
6774 }
6775 case HLoadClass::LoadKind::kJitTableAddress: {
6776 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6777 /* no_rip= */ true);
6778 Label* fixup_label =
6779 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6780 // /* GcRoot<mirror::Class> */ out = *address
6781 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6782 break;
6783 }
6784 default:
6785 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6786 UNREACHABLE();
6787 }
6788
6789 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6790 DCHECK(cls->CanCallRuntime());
6791 SlowPathCode* slow_path =
6792 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6793 codegen_->AddSlowPath(slow_path);
6794 if (generate_null_check) {
6795 __ testl(out, out);
6796 __ j(kEqual, slow_path->GetEntryLabel());
6797 }
6798 if (cls->MustGenerateClinitCheck()) {
6799 GenerateClassInitializationCheck(slow_path, out);
6800 } else {
6801 __ Bind(slow_path->GetExitLabel());
6802 }
6803 }
6804 }
6805
VisitClinitCheck(HClinitCheck * check)6806 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6807 LocationSummary* locations =
6808 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6809 locations->SetInAt(0, Location::RequiresRegister());
6810 if (check->HasUses()) {
6811 locations->SetOut(Location::SameAsFirstInput());
6812 }
6813 // Rely on the type initialization to save everything we need.
6814 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6815 }
6816
VisitLoadMethodHandle(HLoadMethodHandle * load)6817 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6818 // Custom calling convention: RAX serves as both input and output.
6819 Location location = Location::RegisterLocation(RAX);
6820 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6821 }
6822
VisitLoadMethodHandle(HLoadMethodHandle * load)6823 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6824 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6825 }
6826
VisitLoadMethodType(HLoadMethodType * load)6827 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6828 LocationSummary* locations =
6829 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6830 if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6831 Location location = Location::RegisterLocation(RAX);
6832 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6833 } else {
6834 DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kBssEntry);
6835 locations->SetOut(Location::RequiresRegister());
6836 if (codegen_->EmitNonBakerReadBarrier()) {
6837 // For non-Baker read barrier we have a temp-clobbering call.
6838 } else {
6839 // Rely on the pResolveMethodType to save everything.
6840 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6841 }
6842 }
6843 }
6844
VisitLoadMethodType(HLoadMethodType * load)6845 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6846 LocationSummary* locations = load->GetLocations();
6847 Location out_loc = locations->Out();
6848 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6849
6850 switch (load->GetLoadKind()) {
6851 case HLoadMethodType::LoadKind::kBssEntry: {
6852 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6853 /* no_rip= */ false);
6854 Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6855 // /* GcRoot<mirror::MethodType> */ out = *address /* PC-relative */
6856 GenerateGcRootFieldLoad(
6857 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6858 // No need for memory fence, thanks to the x86-64 memory model.
6859 SlowPathCode* slow_path =
6860 new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6861 codegen_->AddSlowPath(slow_path);
6862 __ testl(out, out);
6863 __ j(kEqual, slow_path->GetEntryLabel());
6864 __ Bind(slow_path->GetExitLabel());
6865 return;
6866 }
6867 default:
6868 DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6869 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6870 break;
6871 }
6872 }
6873
VisitClinitCheck(HClinitCheck * check)6874 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6875 // We assume the class to not be null.
6876 SlowPathCode* slow_path =
6877 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6878 codegen_->AddSlowPath(slow_path);
6879 GenerateClassInitializationCheck(slow_path,
6880 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6881 }
6882
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6883 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6884 HLoadString::LoadKind desired_string_load_kind) {
6885 switch (desired_string_load_kind) {
6886 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6887 case HLoadString::LoadKind::kBootImageRelRo:
6888 case HLoadString::LoadKind::kBssEntry:
6889 DCHECK(!GetCompilerOptions().IsJitCompiler());
6890 break;
6891 case HLoadString::LoadKind::kJitBootImageAddress:
6892 case HLoadString::LoadKind::kJitTableAddress:
6893 DCHECK(GetCompilerOptions().IsJitCompiler());
6894 break;
6895 case HLoadString::LoadKind::kRuntimeCall:
6896 break;
6897 }
6898 return desired_string_load_kind;
6899 }
6900
VisitLoadString(HLoadString * load)6901 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6902 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6903 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6904 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6905 locations->SetOut(Location::RegisterLocation(RAX));
6906 } else {
6907 locations->SetOut(Location::RequiresRegister());
6908 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6909 if (codegen_->EmitNonBakerReadBarrier()) {
6910 // For non-Baker read barrier we have a temp-clobbering call.
6911 } else {
6912 // Rely on the pResolveString to save everything.
6913 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6914 }
6915 }
6916 }
6917 }
6918
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6919 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6920 dex::StringIndex string_index,
6921 Handle<mirror::String> handle) {
6922 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6923 // Add a patch entry and return the label.
6924 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6925 PatchInfo<Label>* info = &jit_string_patches_.back();
6926 return &info->label;
6927 }
6928
6929 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6930 // move.
VisitLoadString(HLoadString * load)6931 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6932 LocationSummary* locations = load->GetLocations();
6933 Location out_loc = locations->Out();
6934 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6935
6936 switch (load->GetLoadKind()) {
6937 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6938 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6939 codegen_->GetCompilerOptions().IsBootImageExtension());
6940 __ leal(out,
6941 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6942 codegen_->RecordBootImageStringPatch(load);
6943 return;
6944 }
6945 case HLoadString::LoadKind::kBootImageRelRo: {
6946 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6947 __ movl(out,
6948 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6949 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6950 return;
6951 }
6952 case HLoadString::LoadKind::kBssEntry: {
6953 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6954 /* no_rip= */ false);
6955 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6956 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6957 GenerateGcRootFieldLoad(
6958 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6959 // No need for memory fence, thanks to the x86-64 memory model.
6960 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6961 codegen_->AddSlowPath(slow_path);
6962 __ testl(out, out);
6963 __ j(kEqual, slow_path->GetEntryLabel());
6964 __ Bind(slow_path->GetExitLabel());
6965 return;
6966 }
6967 case HLoadString::LoadKind::kJitBootImageAddress: {
6968 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6969 DCHECK_NE(address, 0u);
6970 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6971 return;
6972 }
6973 case HLoadString::LoadKind::kJitTableAddress: {
6974 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6975 /* no_rip= */ true);
6976 Label* fixup_label = codegen_->NewJitRootStringPatch(
6977 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6978 // /* GcRoot<mirror::String> */ out = *address
6979 GenerateGcRootFieldLoad(
6980 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6981 return;
6982 }
6983 default:
6984 break;
6985 }
6986
6987 // Custom calling convention: RAX serves as both input and output.
6988 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6989 codegen_->InvokeRuntime(kQuickResolveString,
6990 load,
6991 load->GetDexPc());
6992 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6993 }
6994
GetExceptionTlsAddress()6995 static Address GetExceptionTlsAddress() {
6996 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6997 /* no_rip= */ true);
6998 }
6999
VisitLoadException(HLoadException * load)7000 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7001 LocationSummary* locations =
7002 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7003 locations->SetOut(Location::RequiresRegister());
7004 }
7005
VisitLoadException(HLoadException * load)7006 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7007 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7008 }
7009
VisitClearException(HClearException * clear)7010 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7011 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7012 }
7013
VisitClearException(HClearException * clear)7014 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7015 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7016 }
7017
VisitThrow(HThrow * instruction)7018 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7019 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7020 instruction, LocationSummary::kCallOnMainOnly);
7021 InvokeRuntimeCallingConvention calling_convention;
7022 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7023 }
7024
VisitThrow(HThrow * instruction)7025 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7026 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7027 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7028 }
7029
7030 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7031 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7032 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7033 return 1;
7034 }
7035 if (emit_read_barrier &&
7036 !kUseBakerReadBarrier &&
7037 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7038 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7039 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7040 return 1;
7041 }
7042 return 0;
7043 }
7044
7045 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7046 // interface pointer, the current interface is compared in memory.
7047 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7048 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7049 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7050 }
7051
VisitInstanceOf(HInstanceOf * instruction)7052 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7053 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7054 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7055 bool baker_read_barrier_slow_path = false;
7056 switch (type_check_kind) {
7057 case TypeCheckKind::kExactCheck:
7058 case TypeCheckKind::kAbstractClassCheck:
7059 case TypeCheckKind::kClassHierarchyCheck:
7060 case TypeCheckKind::kArrayObjectCheck:
7061 case TypeCheckKind::kInterfaceCheck: {
7062 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7063 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7064 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7065 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7066 break;
7067 }
7068 case TypeCheckKind::kArrayCheck:
7069 case TypeCheckKind::kUnresolvedCheck:
7070 call_kind = LocationSummary::kCallOnSlowPath;
7071 break;
7072 case TypeCheckKind::kBitstringCheck:
7073 break;
7074 }
7075
7076 LocationSummary* locations =
7077 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7078 if (baker_read_barrier_slow_path) {
7079 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7080 }
7081 locations->SetInAt(0, Location::RequiresRegister());
7082 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7083 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7084 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7085 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7086 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7087 locations->SetInAt(1, Location::RequiresRegister());
7088 } else {
7089 locations->SetInAt(1, Location::Any());
7090 }
7091 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7092 locations->SetOut(Location::RequiresRegister());
7093 locations->AddRegisterTemps(
7094 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7095 }
7096
VisitInstanceOf(HInstanceOf * instruction)7097 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7098 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7099 LocationSummary* locations = instruction->GetLocations();
7100 Location obj_loc = locations->InAt(0);
7101 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7102 Location cls = locations->InAt(1);
7103 Location out_loc = locations->Out();
7104 CpuRegister out = out_loc.AsRegister<CpuRegister>();
7105 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7106 DCHECK_LE(num_temps, 1u);
7107 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7108 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7109 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7110 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7111 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7112 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7113 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7114 const uint32_t object_array_data_offset =
7115 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7116 SlowPathCode* slow_path = nullptr;
7117 NearLabel done, zero;
7118
7119 // Return 0 if `obj` is null.
7120 // Avoid null check if we know obj is not null.
7121 if (instruction->MustDoNullCheck()) {
7122 __ testl(obj, obj);
7123 __ j(kEqual, &zero);
7124 }
7125
7126 switch (type_check_kind) {
7127 case TypeCheckKind::kExactCheck: {
7128 ReadBarrierOption read_barrier_option =
7129 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7130 // /* HeapReference<Class> */ out = obj->klass_
7131 GenerateReferenceLoadTwoRegisters(instruction,
7132 out_loc,
7133 obj_loc,
7134 class_offset,
7135 read_barrier_option);
7136 if (cls.IsRegister()) {
7137 __ cmpl(out, cls.AsRegister<CpuRegister>());
7138 } else {
7139 DCHECK(cls.IsStackSlot()) << cls;
7140 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7141 }
7142 if (zero.IsLinked()) {
7143 // Classes must be equal for the instanceof to succeed.
7144 __ j(kNotEqual, &zero);
7145 __ movl(out, Immediate(1));
7146 __ jmp(&done);
7147 } else {
7148 __ setcc(kEqual, out);
7149 // setcc only sets the low byte.
7150 __ andl(out, Immediate(1));
7151 }
7152 break;
7153 }
7154
7155 case TypeCheckKind::kAbstractClassCheck: {
7156 ReadBarrierOption read_barrier_option =
7157 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7158 // /* HeapReference<Class> */ out = obj->klass_
7159 GenerateReferenceLoadTwoRegisters(instruction,
7160 out_loc,
7161 obj_loc,
7162 class_offset,
7163 read_barrier_option);
7164 // If the class is abstract, we eagerly fetch the super class of the
7165 // object to avoid doing a comparison we know will fail.
7166 NearLabel loop, success;
7167 __ Bind(&loop);
7168 // /* HeapReference<Class> */ out = out->super_class_
7169 GenerateReferenceLoadOneRegister(instruction,
7170 out_loc,
7171 super_offset,
7172 maybe_temp_loc,
7173 read_barrier_option);
7174 __ testl(out, out);
7175 // If `out` is null, we use it for the result, and jump to `done`.
7176 __ j(kEqual, &done);
7177 if (cls.IsRegister()) {
7178 __ cmpl(out, cls.AsRegister<CpuRegister>());
7179 } else {
7180 DCHECK(cls.IsStackSlot()) << cls;
7181 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7182 }
7183 __ j(kNotEqual, &loop);
7184 __ movl(out, Immediate(1));
7185 if (zero.IsLinked()) {
7186 __ jmp(&done);
7187 }
7188 break;
7189 }
7190
7191 case TypeCheckKind::kClassHierarchyCheck: {
7192 ReadBarrierOption read_barrier_option =
7193 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7194 // /* HeapReference<Class> */ out = obj->klass_
7195 GenerateReferenceLoadTwoRegisters(instruction,
7196 out_loc,
7197 obj_loc,
7198 class_offset,
7199 read_barrier_option);
7200 // Walk over the class hierarchy to find a match.
7201 NearLabel loop, success;
7202 __ Bind(&loop);
7203 if (cls.IsRegister()) {
7204 __ cmpl(out, cls.AsRegister<CpuRegister>());
7205 } else {
7206 DCHECK(cls.IsStackSlot()) << cls;
7207 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7208 }
7209 __ j(kEqual, &success);
7210 // /* HeapReference<Class> */ out = out->super_class_
7211 GenerateReferenceLoadOneRegister(instruction,
7212 out_loc,
7213 super_offset,
7214 maybe_temp_loc,
7215 read_barrier_option);
7216 __ testl(out, out);
7217 __ j(kNotEqual, &loop);
7218 // If `out` is null, we use it for the result, and jump to `done`.
7219 __ jmp(&done);
7220 __ Bind(&success);
7221 __ movl(out, Immediate(1));
7222 if (zero.IsLinked()) {
7223 __ jmp(&done);
7224 }
7225 break;
7226 }
7227
7228 case TypeCheckKind::kArrayObjectCheck: {
7229 ReadBarrierOption read_barrier_option =
7230 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7231 // /* HeapReference<Class> */ out = obj->klass_
7232 GenerateReferenceLoadTwoRegisters(instruction,
7233 out_loc,
7234 obj_loc,
7235 class_offset,
7236 read_barrier_option);
7237 // Do an exact check.
7238 NearLabel exact_check;
7239 if (cls.IsRegister()) {
7240 __ cmpl(out, cls.AsRegister<CpuRegister>());
7241 } else {
7242 DCHECK(cls.IsStackSlot()) << cls;
7243 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7244 }
7245 __ j(kEqual, &exact_check);
7246 // Otherwise, we need to check that the object's class is a non-primitive array.
7247 // /* HeapReference<Class> */ out = out->component_type_
7248 GenerateReferenceLoadOneRegister(instruction,
7249 out_loc,
7250 component_offset,
7251 maybe_temp_loc,
7252 read_barrier_option);
7253 __ testl(out, out);
7254 // If `out` is null, we use it for the result, and jump to `done`.
7255 __ j(kEqual, &done);
7256 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7257 __ j(kNotEqual, &zero);
7258 __ Bind(&exact_check);
7259 __ movl(out, Immediate(1));
7260 __ jmp(&done);
7261 break;
7262 }
7263
7264 case TypeCheckKind::kArrayCheck: {
7265 // No read barrier since the slow path will retry upon failure.
7266 // /* HeapReference<Class> */ out = obj->klass_
7267 GenerateReferenceLoadTwoRegisters(instruction,
7268 out_loc,
7269 obj_loc,
7270 class_offset,
7271 kWithoutReadBarrier);
7272 if (cls.IsRegister()) {
7273 __ cmpl(out, cls.AsRegister<CpuRegister>());
7274 } else {
7275 DCHECK(cls.IsStackSlot()) << cls;
7276 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7277 }
7278 DCHECK(locations->OnlyCallsOnSlowPath());
7279 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7280 instruction, /* is_fatal= */ false);
7281 codegen_->AddSlowPath(slow_path);
7282 __ j(kNotEqual, slow_path->GetEntryLabel());
7283 __ movl(out, Immediate(1));
7284 if (zero.IsLinked()) {
7285 __ jmp(&done);
7286 }
7287 break;
7288 }
7289
7290 case TypeCheckKind::kInterfaceCheck: {
7291 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7292 DCHECK(locations->OnlyCallsOnSlowPath());
7293 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7294 instruction, /* is_fatal= */ false);
7295 codegen_->AddSlowPath(slow_path);
7296 if (codegen_->EmitNonBakerReadBarrier()) {
7297 __ jmp(slow_path->GetEntryLabel());
7298 break;
7299 }
7300 // For Baker read barrier, take the slow path while marking.
7301 __ gs()->cmpl(
7302 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7303 Immediate(0));
7304 __ j(kNotEqual, slow_path->GetEntryLabel());
7305 }
7306
7307 // Fast-path without read barriers.
7308 CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7309 // /* HeapReference<Class> */ temp = obj->klass_
7310 __ movl(temp, Address(obj, class_offset));
7311 __ MaybeUnpoisonHeapReference(temp);
7312 // /* HeapReference<Class> */ temp = temp->iftable_
7313 __ movl(temp, Address(temp, iftable_offset));
7314 __ MaybeUnpoisonHeapReference(temp);
7315 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7316 __ movl(out, Address(temp, array_length_offset));
7317 // Maybe poison the `cls` for direct comparison with memory.
7318 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7319 // Loop through the iftable and check if any class matches.
7320 NearLabel loop, end;
7321 __ Bind(&loop);
7322 // Check if we still have an entry to compare.
7323 __ subl(out, Immediate(2));
7324 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7325 // Go to next interface if the classes do not match.
7326 __ cmpl(cls.AsRegister<CpuRegister>(),
7327 CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7328 __ j(kNotEqual, &loop);
7329 if (zero.IsLinked()) {
7330 __ movl(out, Immediate(1));
7331 // If `cls` was poisoned above, unpoison it.
7332 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7333 __ jmp(&done);
7334 if (kPoisonHeapReferences) {
7335 // The false case needs to unpoison the class before jumping to `zero`.
7336 __ Bind(&end);
7337 __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7338 __ jmp(&zero);
7339 }
7340 } else {
7341 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7342 __ movl(out, Immediate(-1));
7343 __ Bind(&end);
7344 __ addl(out, Immediate(2));
7345 // If `cls` was poisoned above, unpoison it.
7346 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7347 }
7348 break;
7349 }
7350
7351 case TypeCheckKind::kUnresolvedCheck: {
7352 // Note that we indeed only call on slow path, but we always go
7353 // into the slow path for the unresolved check case.
7354 //
7355 // We cannot directly call the InstanceofNonTrivial runtime
7356 // entry point without resorting to a type checking slow path
7357 // here (i.e. by calling InvokeRuntime directly), as it would
7358 // require to assign fixed registers for the inputs of this
7359 // HInstanceOf instruction (following the runtime calling
7360 // convention), which might be cluttered by the potential first
7361 // read barrier emission at the beginning of this method.
7362 //
7363 // TODO: Introduce a new runtime entry point taking the object
7364 // to test (instead of its class) as argument, and let it deal
7365 // with the read barrier issues. This will let us refactor this
7366 // case of the `switch` code as it was previously (with a direct
7367 // call to the runtime not using a type checking slow path).
7368 // This should also be beneficial for the other cases above.
7369 DCHECK(locations->OnlyCallsOnSlowPath());
7370 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7371 instruction, /* is_fatal= */ false);
7372 codegen_->AddSlowPath(slow_path);
7373 __ jmp(slow_path->GetEntryLabel());
7374 break;
7375 }
7376
7377 case TypeCheckKind::kBitstringCheck: {
7378 // /* HeapReference<Class> */ temp = obj->klass_
7379 GenerateReferenceLoadTwoRegisters(instruction,
7380 out_loc,
7381 obj_loc,
7382 class_offset,
7383 kWithoutReadBarrier);
7384
7385 GenerateBitstringTypeCheckCompare(instruction, out);
7386 if (zero.IsLinked()) {
7387 __ j(kNotEqual, &zero);
7388 __ movl(out, Immediate(1));
7389 __ jmp(&done);
7390 } else {
7391 __ setcc(kEqual, out);
7392 // setcc only sets the low byte.
7393 __ andl(out, Immediate(1));
7394 }
7395 break;
7396 }
7397 }
7398
7399 if (zero.IsLinked()) {
7400 __ Bind(&zero);
7401 __ xorl(out, out);
7402 }
7403
7404 if (done.IsLinked()) {
7405 __ Bind(&done);
7406 }
7407
7408 if (slow_path != nullptr) {
7409 __ Bind(slow_path->GetExitLabel());
7410 }
7411 }
7412
VisitCheckCast(HCheckCast * instruction)7413 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7414 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7415 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7416 LocationSummary* locations =
7417 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7418 locations->SetInAt(0, Location::RequiresRegister());
7419 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7420 // Require a register for the interface check since there is a loop that compares the class to
7421 // a memory address.
7422 locations->SetInAt(1, Location::RequiresRegister());
7423 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7424 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7425 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7426 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7427 } else {
7428 locations->SetInAt(1, Location::Any());
7429 }
7430 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7431 }
7432
VisitCheckCast(HCheckCast * instruction)7433 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7434 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7435 LocationSummary* locations = instruction->GetLocations();
7436 Location obj_loc = locations->InAt(0);
7437 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7438 Location cls = locations->InAt(1);
7439 Location temp_loc = locations->GetTemp(0);
7440 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7441 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7442 DCHECK_GE(num_temps, 1u);
7443 DCHECK_LE(num_temps, 2u);
7444 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7445 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7446 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7447 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7448 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7449 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7450 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7451 const uint32_t object_array_data_offset =
7452 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7453
7454 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7455 SlowPathCode* type_check_slow_path =
7456 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7457 instruction, is_type_check_slow_path_fatal);
7458 codegen_->AddSlowPath(type_check_slow_path);
7459
7460
7461 NearLabel done;
7462 // Avoid null check if we know obj is not null.
7463 if (instruction->MustDoNullCheck()) {
7464 __ testl(obj, obj);
7465 __ j(kEqual, &done);
7466 }
7467
7468 switch (type_check_kind) {
7469 case TypeCheckKind::kExactCheck:
7470 case TypeCheckKind::kArrayCheck: {
7471 // /* HeapReference<Class> */ temp = obj->klass_
7472 GenerateReferenceLoadTwoRegisters(instruction,
7473 temp_loc,
7474 obj_loc,
7475 class_offset,
7476 kWithoutReadBarrier);
7477 if (cls.IsRegister()) {
7478 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7479 } else {
7480 DCHECK(cls.IsStackSlot()) << cls;
7481 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7482 }
7483 // Jump to slow path for throwing the exception or doing a
7484 // more involved array check.
7485 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7486 break;
7487 }
7488
7489 case TypeCheckKind::kAbstractClassCheck: {
7490 // /* HeapReference<Class> */ temp = obj->klass_
7491 GenerateReferenceLoadTwoRegisters(instruction,
7492 temp_loc,
7493 obj_loc,
7494 class_offset,
7495 kWithoutReadBarrier);
7496 // If the class is abstract, we eagerly fetch the super class of the
7497 // object to avoid doing a comparison we know will fail.
7498 NearLabel loop;
7499 __ Bind(&loop);
7500 // /* HeapReference<Class> */ temp = temp->super_class_
7501 GenerateReferenceLoadOneRegister(instruction,
7502 temp_loc,
7503 super_offset,
7504 maybe_temp2_loc,
7505 kWithoutReadBarrier);
7506
7507 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7508 // exception.
7509 __ testl(temp, temp);
7510 // Otherwise, compare the classes.
7511 __ j(kZero, type_check_slow_path->GetEntryLabel());
7512 if (cls.IsRegister()) {
7513 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7514 } else {
7515 DCHECK(cls.IsStackSlot()) << cls;
7516 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7517 }
7518 __ j(kNotEqual, &loop);
7519 break;
7520 }
7521
7522 case TypeCheckKind::kClassHierarchyCheck: {
7523 // /* HeapReference<Class> */ temp = obj->klass_
7524 GenerateReferenceLoadTwoRegisters(instruction,
7525 temp_loc,
7526 obj_loc,
7527 class_offset,
7528 kWithoutReadBarrier);
7529 // Walk over the class hierarchy to find a match.
7530 NearLabel loop;
7531 __ Bind(&loop);
7532 if (cls.IsRegister()) {
7533 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7534 } else {
7535 DCHECK(cls.IsStackSlot()) << cls;
7536 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7537 }
7538 __ j(kEqual, &done);
7539
7540 // /* HeapReference<Class> */ temp = temp->super_class_
7541 GenerateReferenceLoadOneRegister(instruction,
7542 temp_loc,
7543 super_offset,
7544 maybe_temp2_loc,
7545 kWithoutReadBarrier);
7546
7547 // If the class reference currently in `temp` is not null, jump
7548 // back at the beginning of the loop.
7549 __ testl(temp, temp);
7550 __ j(kNotZero, &loop);
7551 // Otherwise, jump to the slow path to throw the exception.
7552 __ jmp(type_check_slow_path->GetEntryLabel());
7553 break;
7554 }
7555
7556 case TypeCheckKind::kArrayObjectCheck: {
7557 // /* HeapReference<Class> */ temp = obj->klass_
7558 GenerateReferenceLoadTwoRegisters(instruction,
7559 temp_loc,
7560 obj_loc,
7561 class_offset,
7562 kWithoutReadBarrier);
7563 // Do an exact check.
7564 NearLabel check_non_primitive_component_type;
7565 if (cls.IsRegister()) {
7566 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7567 } else {
7568 DCHECK(cls.IsStackSlot()) << cls;
7569 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7570 }
7571 __ j(kEqual, &done);
7572
7573 // Otherwise, we need to check that the object's class is a non-primitive array.
7574 // /* HeapReference<Class> */ temp = temp->component_type_
7575 GenerateReferenceLoadOneRegister(instruction,
7576 temp_loc,
7577 component_offset,
7578 maybe_temp2_loc,
7579 kWithoutReadBarrier);
7580
7581 // If the component type is not null (i.e. the object is indeed
7582 // an array), jump to label `check_non_primitive_component_type`
7583 // to further check that this component type is not a primitive
7584 // type.
7585 __ testl(temp, temp);
7586 // Otherwise, jump to the slow path to throw the exception.
7587 __ j(kZero, type_check_slow_path->GetEntryLabel());
7588 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7589 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7590 break;
7591 }
7592
7593 case TypeCheckKind::kUnresolvedCheck: {
7594 // We always go into the type check slow path for the unresolved case.
7595 //
7596 // We cannot directly call the CheckCast runtime entry point
7597 // without resorting to a type checking slow path here (i.e. by
7598 // calling InvokeRuntime directly), as it would require to
7599 // assign fixed registers for the inputs of this HInstanceOf
7600 // instruction (following the runtime calling convention), which
7601 // might be cluttered by the potential first read barrier
7602 // emission at the beginning of this method.
7603 __ jmp(type_check_slow_path->GetEntryLabel());
7604 break;
7605 }
7606
7607 case TypeCheckKind::kInterfaceCheck: {
7608 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7609 // We can not get false positives by doing this.
7610 // /* HeapReference<Class> */ temp = obj->klass_
7611 GenerateReferenceLoadTwoRegisters(instruction,
7612 temp_loc,
7613 obj_loc,
7614 class_offset,
7615 kWithoutReadBarrier);
7616
7617 // /* HeapReference<Class> */ temp = temp->iftable_
7618 GenerateReferenceLoadOneRegister(instruction,
7619 temp_loc,
7620 iftable_offset,
7621 maybe_temp2_loc,
7622 kWithoutReadBarrier);
7623 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7624 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7625 // Maybe poison the `cls` for direct comparison with memory.
7626 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7627 // Loop through the iftable and check if any class matches.
7628 NearLabel start_loop;
7629 __ Bind(&start_loop);
7630 // Check if we still have an entry to compare.
7631 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7632 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7633 // Go to next interface if the classes do not match.
7634 __ cmpl(cls.AsRegister<CpuRegister>(),
7635 CodeGeneratorX86_64::ArrayAddress(temp,
7636 maybe_temp2_loc,
7637 TIMES_4,
7638 object_array_data_offset));
7639 __ j(kNotEqual, &start_loop); // Return if same class.
7640 // If `cls` was poisoned above, unpoison it.
7641 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7642 break;
7643 }
7644
7645 case TypeCheckKind::kBitstringCheck: {
7646 // /* HeapReference<Class> */ temp = obj->klass_
7647 GenerateReferenceLoadTwoRegisters(instruction,
7648 temp_loc,
7649 obj_loc,
7650 class_offset,
7651 kWithoutReadBarrier);
7652
7653 GenerateBitstringTypeCheckCompare(instruction, temp);
7654 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7655 break;
7656 }
7657 }
7658
7659 if (done.IsLinked()) {
7660 __ Bind(&done);
7661 }
7662
7663 __ Bind(type_check_slow_path->GetExitLabel());
7664 }
7665
VisitMonitorOperation(HMonitorOperation * instruction)7666 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7667 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7668 instruction, LocationSummary::kCallOnMainOnly);
7669 InvokeRuntimeCallingConvention calling_convention;
7670 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7671 }
7672
VisitMonitorOperation(HMonitorOperation * instruction)7673 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7674 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7675 instruction,
7676 instruction->GetDexPc());
7677 if (instruction->IsEnter()) {
7678 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7679 } else {
7680 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7681 }
7682 }
7683
VisitX86AndNot(HX86AndNot * instruction)7684 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7685 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7686 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7687 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7688 locations->SetInAt(0, Location::RequiresRegister());
7689 // There is no immediate variant of negated bitwise and in X86.
7690 locations->SetInAt(1, Location::RequiresRegister());
7691 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7692 }
7693
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7694 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7695 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7696 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7697 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7698 locations->SetInAt(0, Location::RequiresRegister());
7699 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7700 }
7701
VisitX86AndNot(HX86AndNot * instruction)7702 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7703 LocationSummary* locations = instruction->GetLocations();
7704 Location first = locations->InAt(0);
7705 Location second = locations->InAt(1);
7706 Location dest = locations->Out();
7707 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7708 }
7709
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7710 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7711 LocationSummary* locations = instruction->GetLocations();
7712 Location src = locations->InAt(0);
7713 Location dest = locations->Out();
7714 switch (instruction->GetOpKind()) {
7715 case HInstruction::kAnd:
7716 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7717 break;
7718 case HInstruction::kXor:
7719 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7720 break;
7721 default:
7722 LOG(FATAL) << "Unreachable";
7723 }
7724 }
7725
VisitAnd(HAnd * instruction)7726 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7727 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7728 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7729
HandleBitwiseOperation(HBinaryOperation * instruction)7730 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7731 LocationSummary* locations =
7732 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7733 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7734 || instruction->GetResultType() == DataType::Type::kInt64);
7735 locations->SetInAt(0, Location::RequiresRegister());
7736 locations->SetInAt(1, Location::Any());
7737 locations->SetOut(Location::SameAsFirstInput());
7738 }
7739
VisitAnd(HAnd * instruction)7740 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7741 HandleBitwiseOperation(instruction);
7742 }
7743
VisitOr(HOr * instruction)7744 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7745 HandleBitwiseOperation(instruction);
7746 }
7747
VisitXor(HXor * instruction)7748 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7749 HandleBitwiseOperation(instruction);
7750 }
7751
HandleBitwiseOperation(HBinaryOperation * instruction)7752 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7753 LocationSummary* locations = instruction->GetLocations();
7754 Location first = locations->InAt(0);
7755 Location second = locations->InAt(1);
7756 DCHECK(first.Equals(locations->Out()));
7757
7758 if (instruction->GetResultType() == DataType::Type::kInt32) {
7759 if (second.IsRegister()) {
7760 if (instruction->IsAnd()) {
7761 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7762 } else if (instruction->IsOr()) {
7763 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7764 } else {
7765 DCHECK(instruction->IsXor());
7766 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7767 }
7768 } else if (second.IsConstant()) {
7769 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7770 if (instruction->IsAnd()) {
7771 __ andl(first.AsRegister<CpuRegister>(), imm);
7772 } else if (instruction->IsOr()) {
7773 __ orl(first.AsRegister<CpuRegister>(), imm);
7774 } else {
7775 DCHECK(instruction->IsXor());
7776 __ xorl(first.AsRegister<CpuRegister>(), imm);
7777 }
7778 } else {
7779 Address address(CpuRegister(RSP), second.GetStackIndex());
7780 if (instruction->IsAnd()) {
7781 __ andl(first.AsRegister<CpuRegister>(), address);
7782 } else if (instruction->IsOr()) {
7783 __ orl(first.AsRegister<CpuRegister>(), address);
7784 } else {
7785 DCHECK(instruction->IsXor());
7786 __ xorl(first.AsRegister<CpuRegister>(), address);
7787 }
7788 }
7789 } else {
7790 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7791 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7792 bool second_is_constant = false;
7793 int64_t value = 0;
7794 if (second.IsConstant()) {
7795 second_is_constant = true;
7796 value = second.GetConstant()->AsLongConstant()->GetValue();
7797 }
7798 bool is_int32_value = IsInt<32>(value);
7799
7800 if (instruction->IsAnd()) {
7801 if (second_is_constant) {
7802 if (is_int32_value) {
7803 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7804 } else {
7805 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7806 }
7807 } else if (second.IsDoubleStackSlot()) {
7808 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7809 } else {
7810 __ andq(first_reg, second.AsRegister<CpuRegister>());
7811 }
7812 } else if (instruction->IsOr()) {
7813 if (second_is_constant) {
7814 if (is_int32_value) {
7815 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7816 } else {
7817 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7818 }
7819 } else if (second.IsDoubleStackSlot()) {
7820 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7821 } else {
7822 __ orq(first_reg, second.AsRegister<CpuRegister>());
7823 }
7824 } else {
7825 DCHECK(instruction->IsXor());
7826 if (second_is_constant) {
7827 if (is_int32_value) {
7828 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7829 } else {
7830 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7831 }
7832 } else if (second.IsDoubleStackSlot()) {
7833 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7834 } else {
7835 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7836 }
7837 }
7838 }
7839 }
7840
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7841 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7842 HInstruction* instruction,
7843 Location out,
7844 uint32_t offset,
7845 Location maybe_temp,
7846 ReadBarrierOption read_barrier_option) {
7847 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7848 if (read_barrier_option == kWithReadBarrier) {
7849 DCHECK(codegen_->EmitReadBarrier());
7850 if (kUseBakerReadBarrier) {
7851 // Load with fast path based Baker's read barrier.
7852 // /* HeapReference<Object> */ out = *(out + offset)
7853 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7854 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7855 } else {
7856 // Load with slow path based read barrier.
7857 // Save the value of `out` into `maybe_temp` before overwriting it
7858 // in the following move operation, as we will need it for the
7859 // read barrier below.
7860 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7861 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7862 // /* HeapReference<Object> */ out = *(out + offset)
7863 __ movl(out_reg, Address(out_reg, offset));
7864 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7865 }
7866 } else {
7867 // Plain load with no read barrier.
7868 // /* HeapReference<Object> */ out = *(out + offset)
7869 __ movl(out_reg, Address(out_reg, offset));
7870 __ MaybeUnpoisonHeapReference(out_reg);
7871 }
7872 }
7873
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7874 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7875 HInstruction* instruction,
7876 Location out,
7877 Location obj,
7878 uint32_t offset,
7879 ReadBarrierOption read_barrier_option) {
7880 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7881 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7882 if (read_barrier_option == kWithReadBarrier) {
7883 DCHECK(codegen_->EmitReadBarrier());
7884 if (kUseBakerReadBarrier) {
7885 // Load with fast path based Baker's read barrier.
7886 // /* HeapReference<Object> */ out = *(obj + offset)
7887 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7888 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7889 } else {
7890 // Load with slow path based read barrier.
7891 // /* HeapReference<Object> */ out = *(obj + offset)
7892 __ movl(out_reg, Address(obj_reg, offset));
7893 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7894 }
7895 } else {
7896 // Plain load with no read barrier.
7897 // /* HeapReference<Object> */ out = *(obj + offset)
7898 __ movl(out_reg, Address(obj_reg, offset));
7899 __ MaybeUnpoisonHeapReference(out_reg);
7900 }
7901 }
7902
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7903 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7904 HInstruction* instruction,
7905 Location root,
7906 const Address& address,
7907 Label* fixup_label,
7908 ReadBarrierOption read_barrier_option) {
7909 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7910 if (read_barrier_option == kWithReadBarrier) {
7911 DCHECK(codegen_->EmitReadBarrier());
7912 if (kUseBakerReadBarrier) {
7913 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7914 // Baker's read barrier are used:
7915 //
7916 // root = obj.field;
7917 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7918 // if (temp != null) {
7919 // root = temp(root)
7920 // }
7921
7922 // /* GcRoot<mirror::Object> */ root = *address
7923 __ movl(root_reg, address);
7924 if (fixup_label != nullptr) {
7925 __ Bind(fixup_label);
7926 }
7927 static_assert(
7928 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7929 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7930 "have different sizes.");
7931 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7932 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7933 "have different sizes.");
7934
7935 // Slow path marking the GC root `root`.
7936 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7937 instruction, root, /* unpoison_ref_before_marking= */ false);
7938 codegen_->AddSlowPath(slow_path);
7939
7940 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7941 const int32_t entry_point_offset =
7942 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7943 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7944 // The entrypoint is null when the GC is not marking.
7945 __ j(kNotEqual, slow_path->GetEntryLabel());
7946 __ Bind(slow_path->GetExitLabel());
7947 } else {
7948 // GC root loaded through a slow path for read barriers other
7949 // than Baker's.
7950 // /* GcRoot<mirror::Object>* */ root = address
7951 __ leaq(root_reg, address);
7952 if (fixup_label != nullptr) {
7953 __ Bind(fixup_label);
7954 }
7955 // /* mirror::Object* */ root = root->Read()
7956 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7957 }
7958 } else {
7959 // Plain GC root load with no read barrier.
7960 // /* GcRoot<mirror::Object> */ root = *address
7961 __ movl(root_reg, address);
7962 if (fixup_label != nullptr) {
7963 __ Bind(fixup_label);
7964 }
7965 // Note that GC roots are not affected by heap poisoning, thus we
7966 // do not have to unpoison `root_reg` here.
7967 }
7968 }
7969
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7970 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7971 Location ref,
7972 CpuRegister obj,
7973 uint32_t offset,
7974 bool needs_null_check) {
7975 DCHECK(EmitBakerReadBarrier());
7976
7977 // /* HeapReference<Object> */ ref = *(obj + offset)
7978 Address src(obj, offset);
7979 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7980 }
7981
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7982 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7983 Location ref,
7984 CpuRegister obj,
7985 uint32_t data_offset,
7986 Location index,
7987 bool needs_null_check) {
7988 DCHECK(EmitBakerReadBarrier());
7989
7990 static_assert(
7991 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7992 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7993 // /* HeapReference<Object> */ ref =
7994 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7995 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7996 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7997 }
7998
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7999 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8000 Location ref,
8001 CpuRegister obj,
8002 const Address& src,
8003 bool needs_null_check,
8004 bool always_update_field,
8005 CpuRegister* temp1,
8006 CpuRegister* temp2) {
8007 DCHECK(EmitBakerReadBarrier());
8008
8009 // In slow path based read barriers, the read barrier call is
8010 // inserted after the original load. However, in fast path based
8011 // Baker's read barriers, we need to perform the load of
8012 // mirror::Object::monitor_ *before* the original reference load.
8013 // This load-load ordering is required by the read barrier.
8014 // The fast path/slow path (for Baker's algorithm) should look like:
8015 //
8016 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8017 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8018 // HeapReference<Object> ref = *src; // Original reference load.
8019 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8020 // if (is_gray) {
8021 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8022 // }
8023 //
8024 // Note: the original implementation in ReadBarrier::Barrier is
8025 // slightly more complex as:
8026 // - it implements the load-load fence using a data dependency on
8027 // the high-bits of rb_state, which are expected to be all zeroes
8028 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8029 // here, which is a no-op thanks to the x86-64 memory model);
8030 // - it performs additional checks that we do not do here for
8031 // performance reasons.
8032
8033 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8034 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8035
8036 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8037 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8038 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8039 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8040 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8041 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8042
8043 // if (rb_state == ReadBarrier::GrayState())
8044 // ref = ReadBarrier::Mark(ref);
8045 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8046 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8047 if (needs_null_check) {
8048 MaybeRecordImplicitNullCheck(instruction);
8049 }
8050
8051 // Load fence to prevent load-load reordering.
8052 // Note that this is a no-op, thanks to the x86-64 memory model.
8053 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8054
8055 // The actual reference load.
8056 // /* HeapReference<Object> */ ref = *src
8057 __ movl(ref_reg, src); // Flags are unaffected.
8058
8059 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8060 // Slow path marking the object `ref` when it is gray.
8061 SlowPathCode* slow_path;
8062 if (always_update_field) {
8063 DCHECK(temp1 != nullptr);
8064 DCHECK(temp2 != nullptr);
8065 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8066 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8067 } else {
8068 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8069 instruction, ref, /* unpoison_ref_before_marking= */ true);
8070 }
8071 AddSlowPath(slow_path);
8072
8073 // We have done the "if" of the gray bit check above, now branch based on the flags.
8074 __ j(kNotZero, slow_path->GetEntryLabel());
8075
8076 // Object* ref = ref_addr->AsMirrorPtr()
8077 __ MaybeUnpoisonHeapReference(ref_reg);
8078
8079 __ Bind(slow_path->GetExitLabel());
8080 }
8081
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8082 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8083 Location out,
8084 Location ref,
8085 Location obj,
8086 uint32_t offset,
8087 Location index) {
8088 DCHECK(EmitReadBarrier());
8089
8090 // Insert a slow path based read barrier *after* the reference load.
8091 //
8092 // If heap poisoning is enabled, the unpoisoning of the loaded
8093 // reference will be carried out by the runtime within the slow
8094 // path.
8095 //
8096 // Note that `ref` currently does not get unpoisoned (when heap
8097 // poisoning is enabled), which is alright as the `ref` argument is
8098 // not used by the artReadBarrierSlow entry point.
8099 //
8100 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8101 SlowPathCode* slow_path = new (GetScopedAllocator())
8102 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8103 AddSlowPath(slow_path);
8104
8105 __ jmp(slow_path->GetEntryLabel());
8106 __ Bind(slow_path->GetExitLabel());
8107 }
8108
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8109 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8110 Location out,
8111 Location ref,
8112 Location obj,
8113 uint32_t offset,
8114 Location index) {
8115 if (EmitReadBarrier()) {
8116 // Baker's read barriers shall be handled by the fast path
8117 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8118 DCHECK(!kUseBakerReadBarrier);
8119 // If heap poisoning is enabled, unpoisoning will be taken care of
8120 // by the runtime within the slow path.
8121 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8122 } else if (kPoisonHeapReferences) {
8123 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8124 }
8125 }
8126
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8127 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8128 Location out,
8129 Location root) {
8130 DCHECK(EmitReadBarrier());
8131
8132 // Insert a slow path based read barrier *after* the GC root load.
8133 //
8134 // Note that GC roots are not affected by heap poisoning, so we do
8135 // not need to do anything special for this here.
8136 SlowPathCode* slow_path =
8137 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8138 AddSlowPath(slow_path);
8139
8140 __ jmp(slow_path->GetEntryLabel());
8141 __ Bind(slow_path->GetExitLabel());
8142 }
8143
VisitBoundType(HBoundType * instruction)8144 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8145 // Nothing to do, this should be removed during prepare for register allocator.
8146 LOG(FATAL) << "Unreachable";
8147 }
8148
VisitBoundType(HBoundType * instruction)8149 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8150 // Nothing to do, this should be removed during prepare for register allocator.
8151 LOG(FATAL) << "Unreachable";
8152 }
8153
8154 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8155 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8156 LocationSummary* locations =
8157 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8158 locations->SetInAt(0, Location::RequiresRegister());
8159 locations->AddTemp(Location::RequiresRegister());
8160 locations->AddTemp(Location::RequiresRegister());
8161 }
8162
VisitPackedSwitch(HPackedSwitch * switch_instr)8163 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8164 int32_t lower_bound = switch_instr->GetStartValue();
8165 uint32_t num_entries = switch_instr->GetNumEntries();
8166 LocationSummary* locations = switch_instr->GetLocations();
8167 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8168 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8169 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8170 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8171
8172 // Should we generate smaller inline compare/jumps?
8173 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8174 // Figure out the correct compare values and jump conditions.
8175 // Handle the first compare/branch as a special case because it might
8176 // jump to the default case.
8177 DCHECK_GT(num_entries, 2u);
8178 Condition first_condition;
8179 uint32_t index;
8180 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8181 if (lower_bound != 0) {
8182 first_condition = kLess;
8183 __ cmpl(value_reg_in, Immediate(lower_bound));
8184 __ j(first_condition, codegen_->GetLabelOf(default_block));
8185 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8186
8187 index = 1;
8188 } else {
8189 // Handle all the compare/jumps below.
8190 first_condition = kBelow;
8191 index = 0;
8192 }
8193
8194 // Handle the rest of the compare/jumps.
8195 for (; index + 1 < num_entries; index += 2) {
8196 int32_t compare_to_value = lower_bound + index + 1;
8197 __ cmpl(value_reg_in, Immediate(compare_to_value));
8198 // Jump to successors[index] if value < case_value[index].
8199 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8200 // Jump to successors[index + 1] if value == case_value[index + 1].
8201 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8202 }
8203
8204 if (index != num_entries) {
8205 // There are an odd number of entries. Handle the last one.
8206 DCHECK_EQ(index + 1, num_entries);
8207 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8208 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8209 }
8210
8211 // And the default for any other value.
8212 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8213 __ jmp(codegen_->GetLabelOf(default_block));
8214 }
8215 return;
8216 }
8217
8218 // Remove the bias, if needed.
8219 Register value_reg_out = value_reg_in.AsRegister();
8220 if (lower_bound != 0) {
8221 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8222 value_reg_out = temp_reg.AsRegister();
8223 }
8224 CpuRegister value_reg(value_reg_out);
8225
8226 // Is the value in range?
8227 __ cmpl(value_reg, Immediate(num_entries - 1));
8228 __ j(kAbove, codegen_->GetLabelOf(default_block));
8229
8230 // We are in the range of the table.
8231 // Load the address of the jump table in the constant area.
8232 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8233
8234 // Load the (signed) offset from the jump table.
8235 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8236
8237 // Add the offset to the address of the table base.
8238 __ addq(temp_reg, base_reg);
8239
8240 // And jump.
8241 __ jmp(temp_reg);
8242 }
8243
VisitIntermediateAddress(HIntermediateAddress * instruction)8244 void LocationsBuilderX86_64::VisitIntermediateAddress(
8245 [[maybe_unused]] HIntermediateAddress* instruction) {
8246 LOG(FATAL) << "Unreachable";
8247 }
8248
VisitIntermediateAddress(HIntermediateAddress * instruction)8249 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8250 [[maybe_unused]] HIntermediateAddress* instruction) {
8251 LOG(FATAL) << "Unreachable";
8252 }
8253
Load32BitValue(CpuRegister dest,int32_t value)8254 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8255 if (value == 0) {
8256 __ xorl(dest, dest);
8257 } else {
8258 __ movl(dest, Immediate(value));
8259 }
8260 }
8261
Load64BitValue(CpuRegister dest,int64_t value)8262 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8263 if (value == 0) {
8264 // Clears upper bits too.
8265 __ xorl(dest, dest);
8266 } else if (IsUint<32>(value)) {
8267 // We can use a 32 bit move, as it will zero-extend and is shorter.
8268 __ movl(dest, Immediate(static_cast<int32_t>(value)));
8269 } else {
8270 __ movq(dest, Immediate(value));
8271 }
8272 }
8273
Load32BitValue(XmmRegister dest,int32_t value)8274 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8275 if (value == 0) {
8276 __ xorps(dest, dest);
8277 } else {
8278 __ movss(dest, LiteralInt32Address(value));
8279 }
8280 }
8281
Load64BitValue(XmmRegister dest,int64_t value)8282 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8283 if (value == 0) {
8284 __ xorpd(dest, dest);
8285 } else {
8286 __ movsd(dest, LiteralInt64Address(value));
8287 }
8288 }
8289
Load32BitValue(XmmRegister dest,float value)8290 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8291 Load32BitValue(dest, bit_cast<int32_t, float>(value));
8292 }
8293
Load64BitValue(XmmRegister dest,double value)8294 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8295 Load64BitValue(dest, bit_cast<int64_t, double>(value));
8296 }
8297
Compare32BitValue(CpuRegister dest,int32_t value)8298 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8299 if (value == 0) {
8300 __ testl(dest, dest);
8301 } else {
8302 __ cmpl(dest, Immediate(value));
8303 }
8304 }
8305
Compare64BitValue(CpuRegister dest,int64_t value)8306 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8307 if (IsInt<32>(value)) {
8308 if (value == 0) {
8309 __ testq(dest, dest);
8310 } else {
8311 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8312 }
8313 } else {
8314 // Value won't fit in an int.
8315 __ cmpq(dest, LiteralInt64Address(value));
8316 }
8317 }
8318
GenerateIntCompare(Location lhs,Location rhs)8319 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8320 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8321 GenerateIntCompare(lhs_reg, rhs);
8322 }
8323
GenerateIntCompare(CpuRegister lhs,Location rhs)8324 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8325 if (rhs.IsConstant()) {
8326 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8327 Compare32BitValue(lhs, value);
8328 } else if (rhs.IsStackSlot()) {
8329 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8330 } else {
8331 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8332 }
8333 }
8334
GenerateLongCompare(Location lhs,Location rhs)8335 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8336 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8337 if (rhs.IsConstant()) {
8338 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8339 Compare64BitValue(lhs_reg, value);
8340 } else if (rhs.IsDoubleStackSlot()) {
8341 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8342 } else {
8343 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8344 }
8345 }
8346
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8347 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8348 Location index,
8349 ScaleFactor scale,
8350 uint32_t data_offset) {
8351 return index.IsConstant()
8352 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8353 : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8354 }
8355
Store64BitValueToStack(Location dest,int64_t value)8356 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8357 DCHECK(dest.IsDoubleStackSlot());
8358 if (IsInt<32>(value)) {
8359 // Can move directly as an int32 constant.
8360 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8361 Immediate(static_cast<int32_t>(value)));
8362 } else {
8363 Load64BitValue(CpuRegister(TMP), value);
8364 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8365 }
8366 }
8367
8368 /**
8369 * Class to handle late fixup of offsets into constant area.
8370 */
8371 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8372 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8373 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8374 : codegen_(&codegen), offset_into_constant_area_(offset) {}
8375
8376 protected:
SetOffset(size_t offset)8377 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8378
8379 CodeGeneratorX86_64* codegen_;
8380
8381 private:
Process(const MemoryRegion & region,int pos)8382 void Process(const MemoryRegion& region, int pos) override {
8383 // Patch the correct offset for the instruction. We use the address of the
8384 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8385 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8386 int32_t relative_position = constant_offset - pos;
8387
8388 // Patch in the right value.
8389 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8390 }
8391
8392 // Location in constant area that the fixup refers to.
8393 size_t offset_into_constant_area_;
8394 };
8395
8396 /**
8397 t * Class to handle late fixup of offsets to a jump table that will be created in the
8398 * constant area.
8399 */
8400 class JumpTableRIPFixup : public RIPFixup {
8401 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8402 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8403 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8404
CreateJumpTable()8405 void CreateJumpTable() {
8406 X86_64Assembler* assembler = codegen_->GetAssembler();
8407
8408 // Ensure that the reference to the jump table has the correct offset.
8409 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8410 SetOffset(offset_in_constant_table);
8411
8412 // Compute the offset from the start of the function to this jump table.
8413 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8414
8415 // Populate the jump table with the correct values for the jump table.
8416 int32_t num_entries = switch_instr_->GetNumEntries();
8417 HBasicBlock* block = switch_instr_->GetBlock();
8418 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8419 // The value that we want is the target offset - the position of the table.
8420 for (int32_t i = 0; i < num_entries; i++) {
8421 HBasicBlock* b = successors[i];
8422 Label* l = codegen_->GetLabelOf(b);
8423 DCHECK(l->IsBound());
8424 int32_t offset_to_block = l->Position() - current_table_offset;
8425 assembler->AppendInt32(offset_to_block);
8426 }
8427 }
8428
8429 private:
8430 const HPackedSwitch* switch_instr_;
8431 };
8432
Finalize()8433 void CodeGeneratorX86_64::Finalize() {
8434 // Generate the constant area if needed.
8435 X86_64Assembler* assembler = GetAssembler();
8436 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8437 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8438 assembler->Align(4, 0);
8439 constant_area_start_ = assembler->CodeSize();
8440
8441 // Populate any jump tables.
8442 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8443 jump_table->CreateJumpTable();
8444 }
8445
8446 // And now add the constant area to the generated code.
8447 assembler->AddConstantArea();
8448 }
8449
8450 // And finish up.
8451 CodeGenerator::Finalize();
8452 }
8453
LiteralDoubleAddress(double v)8454 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8455 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8456 return Address::RIP(fixup);
8457 }
8458
LiteralFloatAddress(float v)8459 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8460 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8461 return Address::RIP(fixup);
8462 }
8463
LiteralInt32Address(int32_t v)8464 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8465 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8466 return Address::RIP(fixup);
8467 }
8468
LiteralInt64Address(int64_t v)8469 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8470 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8471 return Address::RIP(fixup);
8472 }
8473
8474 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8475 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8476 if (!trg.IsValid()) {
8477 DCHECK_EQ(type, DataType::Type::kVoid);
8478 return;
8479 }
8480
8481 DCHECK_NE(type, DataType::Type::kVoid);
8482
8483 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8484 if (trg.Equals(return_loc)) {
8485 return;
8486 }
8487
8488 // Let the parallel move resolver take care of all of this.
8489 HParallelMove parallel_move(GetGraph()->GetAllocator());
8490 parallel_move.AddMove(return_loc, trg, type, nullptr);
8491 GetMoveResolver()->EmitNativeCode(¶llel_move);
8492 }
8493
LiteralCaseTable(HPackedSwitch * switch_instr)8494 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8495 // Create a fixup to be used to create and address the jump table.
8496 JumpTableRIPFixup* table_fixup =
8497 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8498
8499 // We have to populate the jump tables.
8500 fixups_to_jump_tables_.push_back(table_fixup);
8501 return Address::RIP(table_fixup);
8502 }
8503
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8504 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8505 const Address& addr_high,
8506 int64_t v,
8507 HInstruction* instruction) {
8508 if (IsInt<32>(v)) {
8509 int32_t v_32 = v;
8510 __ movq(addr_low, Immediate(v_32));
8511 MaybeRecordImplicitNullCheck(instruction);
8512 } else {
8513 // Didn't fit in a register. Do it in pieces.
8514 int32_t low_v = Low32Bits(v);
8515 int32_t high_v = High32Bits(v);
8516 __ movl(addr_low, Immediate(low_v));
8517 MaybeRecordImplicitNullCheck(instruction);
8518 __ movl(addr_high, Immediate(high_v));
8519 }
8520 }
8521
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8522 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8523 const uint8_t* roots_data,
8524 const PatchInfo<Label>& info,
8525 uint64_t index_in_table) const {
8526 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8527 uintptr_t address =
8528 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8529 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8530 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8531 dchecked_integral_cast<uint32_t>(address);
8532 }
8533
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8534 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8535 for (const PatchInfo<Label>& info : jit_string_patches_) {
8536 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8537 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8538 PatchJitRootUse(code, roots_data, info, index_in_table);
8539 }
8540
8541 for (const PatchInfo<Label>& info : jit_class_patches_) {
8542 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8543 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8544 PatchJitRootUse(code, roots_data, info, index_in_table);
8545 }
8546 }
8547
CpuHasAvxFeatureFlag()8548 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8549 return codegen_->GetInstructionSetFeatures().HasAVX();
8550 }
8551
CpuHasAvx2FeatureFlag()8552 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8553 return codegen_->GetInstructionSetFeatures().HasAVX2();
8554 }
8555
CpuHasAvxFeatureFlag()8556 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8557 return codegen_->GetInstructionSetFeatures().HasAVX();
8558 }
8559
CpuHasAvx2FeatureFlag()8560 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8561 return codegen_->GetInstructionSetFeatures().HasAVX2();
8562 }
8563
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8564 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8565 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8566 LOG(FATAL) << "Unimplemented";
8567 }
8568
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8569 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8570 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8571 LOG(FATAL) << "Unimplemented";
8572 }
8573
8574 #undef __
8575
8576 } // namespace x86_64
8577 } // namespace art
8578