1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 19 #include "art_method.h" 20 #include "class_table.h" 21 #include "code_generator_utils.h" 22 #include "compiled_method.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "gc/accounting/card_table.h" 25 #include "gc/space/image_space.h" 26 #include "heap_poisoning.h" 27 #include "intrinsics.h" 28 #include "intrinsics_x86_64.h" 29 #include "linker/linker_patch.h" 30 #include "lock_word.h" 31 #include "mirror/array-inl.h" 32 #include "mirror/class-inl.h" 33 #include "mirror/object_reference.h" 34 #include "thread.h" 35 #include "utils/assembler.h" 36 #include "utils/stack_checks.h" 37 #include "utils/x86_64/assembler_x86_64.h" 38 #include "utils/x86_64/managed_register_x86_64.h" 39 40 namespace art { 41 42 template<class MirrorType> 43 class GcRoot; 44 45 namespace x86_64 { 46 47 static constexpr int kCurrentMethodStackOffset = 0; 48 static constexpr Register kMethodRegisterArgument = RDI; 49 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 50 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 51 // generates less code/data with a small num_entries. 52 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 53 54 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; 55 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; 56 57 static constexpr int kC2ConditionMask = 0x400; 58 59 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { 60 // Custom calling convention: RAX serves as both input and output. 61 RegisterSet caller_saves = RegisterSet::Empty(); 62 caller_saves.Add(Location::RegisterLocation(RAX)); 63 return caller_saves; 64 } 65 66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT 68 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() 69 70 class NullCheckSlowPathX86_64 : public SlowPathCode { 71 public: 72 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} 73 74 void EmitNativeCode(CodeGenerator* codegen) override { 75 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 76 __ Bind(GetEntryLabel()); 77 if (instruction_->CanThrowIntoCatchBlock()) { 78 // Live registers will be restored in the catch block if caught. 79 SaveLiveRegisters(codegen, instruction_->GetLocations()); 80 } 81 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, 82 instruction_, 83 instruction_->GetDexPc(), 84 this); 85 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 86 } 87 88 bool IsFatal() const override { return true; } 89 90 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; } 91 92 private: 93 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); 94 }; 95 96 class DivZeroCheckSlowPathX86_64 : public SlowPathCode { 97 public: 98 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} 99 100 void EmitNativeCode(CodeGenerator* codegen) override { 101 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 102 __ Bind(GetEntryLabel()); 103 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 104 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 105 } 106 107 bool IsFatal() const override { return true; } 108 109 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; } 110 111 private: 112 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); 113 }; 114 115 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { 116 public: 117 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) 118 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} 119 120 void EmitNativeCode(CodeGenerator* codegen) override { 121 __ Bind(GetEntryLabel()); 122 if (type_ == DataType::Type::kInt32) { 123 if (is_div_) { 124 __ negl(cpu_reg_); 125 } else { 126 __ xorl(cpu_reg_, cpu_reg_); 127 } 128 129 } else { 130 DCHECK_EQ(DataType::Type::kInt64, type_); 131 if (is_div_) { 132 __ negq(cpu_reg_); 133 } else { 134 __ xorl(cpu_reg_, cpu_reg_); 135 } 136 } 137 __ jmp(GetExitLabel()); 138 } 139 140 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; } 141 142 private: 143 const CpuRegister cpu_reg_; 144 const DataType::Type type_; 145 const bool is_div_; 146 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); 147 }; 148 149 class SuspendCheckSlowPathX86_64 : public SlowPathCode { 150 public: 151 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) 152 : SlowPathCode(instruction), successor_(successor) {} 153 154 void EmitNativeCode(CodeGenerator* codegen) override { 155 LocationSummary* locations = instruction_->GetLocations(); 156 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 157 __ Bind(GetEntryLabel()); 158 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD. 159 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 160 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 161 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD. 162 if (successor_ == nullptr) { 163 __ jmp(GetReturnLabel()); 164 } else { 165 __ jmp(x86_64_codegen->GetLabelOf(successor_)); 166 } 167 } 168 169 Label* GetReturnLabel() { 170 DCHECK(successor_ == nullptr); 171 return &return_label_; 172 } 173 174 HBasicBlock* GetSuccessor() const { 175 return successor_; 176 } 177 178 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; } 179 180 private: 181 HBasicBlock* const successor_; 182 Label return_label_; 183 184 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); 185 }; 186 187 class BoundsCheckSlowPathX86_64 : public SlowPathCode { 188 public: 189 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) 190 : SlowPathCode(instruction) {} 191 192 void EmitNativeCode(CodeGenerator* codegen) override { 193 LocationSummary* locations = instruction_->GetLocations(); 194 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 195 __ Bind(GetEntryLabel()); 196 if (instruction_->CanThrowIntoCatchBlock()) { 197 // Live registers will be restored in the catch block if caught. 198 SaveLiveRegisters(codegen, instruction_->GetLocations()); 199 } 200 // Are we using an array length from memory? 201 HInstruction* array_length = instruction_->InputAt(1); 202 Location length_loc = locations->InAt(1); 203 InvokeRuntimeCallingConvention calling_convention; 204 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { 205 // Load the array length into our temporary. 206 HArrayLength* length = array_length->AsArrayLength(); 207 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); 208 Location array_loc = array_length->GetLocations()->InAt(0); 209 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 210 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); 211 // Check for conflicts with index. 212 if (length_loc.Equals(locations->InAt(0))) { 213 // We know we aren't using parameter 2. 214 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); 215 } 216 __ movl(length_loc.AsRegister<CpuRegister>(), array_len); 217 if (mirror::kUseStringCompression && length->IsStringLength()) { 218 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); 219 } 220 } 221 222 // We're moving two locations to locations that could overlap, so we need a parallel 223 // move resolver. 224 codegen->EmitParallelMoves( 225 locations->InAt(0), 226 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 227 DataType::Type::kInt32, 228 length_loc, 229 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 230 DataType::Type::kInt32); 231 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 232 ? kQuickThrowStringBounds 233 : kQuickThrowArrayBounds; 234 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 235 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 236 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 237 } 238 239 bool IsFatal() const override { return true; } 240 241 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; } 242 243 private: 244 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); 245 }; 246 247 class LoadClassSlowPathX86_64 : public SlowPathCode { 248 public: 249 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) 250 : SlowPathCode(at), cls_(cls) { 251 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 252 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 253 } 254 255 void EmitNativeCode(CodeGenerator* codegen) override { 256 LocationSummary* locations = instruction_->GetLocations(); 257 Location out = locations->Out(); 258 const uint32_t dex_pc = instruction_->GetDexPc(); 259 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); 260 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); 261 262 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 263 __ Bind(GetEntryLabel()); 264 SaveLiveRegisters(codegen, locations); 265 266 // Custom calling convention: RAX serves as both input and output. 267 if (must_resolve_type) { 268 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile())); 269 dex::TypeIndex type_index = cls_->GetTypeIndex(); 270 __ movl(CpuRegister(RAX), Immediate(type_index.index_)); 271 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); 272 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); 273 // If we also must_do_clinit, the resolved type is now in the correct register. 274 } else { 275 DCHECK(must_do_clinit); 276 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); 277 x86_64_codegen->Move(Location::RegisterLocation(RAX), source); 278 } 279 if (must_do_clinit) { 280 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); 281 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); 282 } 283 284 // Move the class to the desired location. 285 if (out.IsValid()) { 286 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 287 x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); 288 } 289 290 RestoreLiveRegisters(codegen, locations); 291 __ jmp(GetExitLabel()); 292 } 293 294 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; } 295 296 private: 297 // The class this slow path will load. 298 HLoadClass* const cls_; 299 300 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); 301 }; 302 303 class LoadStringSlowPathX86_64 : public SlowPathCode { 304 public: 305 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} 306 307 void EmitNativeCode(CodeGenerator* codegen) override { 308 LocationSummary* locations = instruction_->GetLocations(); 309 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 310 311 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 312 __ Bind(GetEntryLabel()); 313 SaveLiveRegisters(codegen, locations); 314 315 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 316 // Custom calling convention: RAX serves as both input and output. 317 __ movl(CpuRegister(RAX), Immediate(string_index.index_)); 318 x86_64_codegen->InvokeRuntime(kQuickResolveString, 319 instruction_, 320 instruction_->GetDexPc(), 321 this); 322 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 323 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 324 RestoreLiveRegisters(codegen, locations); 325 326 __ jmp(GetExitLabel()); 327 } 328 329 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; } 330 331 private: 332 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); 333 }; 334 335 class TypeCheckSlowPathX86_64 : public SlowPathCode { 336 public: 337 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) 338 : SlowPathCode(instruction), is_fatal_(is_fatal) {} 339 340 void EmitNativeCode(CodeGenerator* codegen) override { 341 LocationSummary* locations = instruction_->GetLocations(); 342 uint32_t dex_pc = instruction_->GetDexPc(); 343 DCHECK(instruction_->IsCheckCast() 344 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 345 346 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 347 __ Bind(GetEntryLabel()); 348 349 if (kPoisonHeapReferences && 350 instruction_->IsCheckCast() && 351 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { 352 // First, unpoison the `cls` reference that was poisoned for direct memory comparison. 353 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>()); 354 } 355 356 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 357 SaveLiveRegisters(codegen, locations); 358 } 359 360 // We're moving two locations to locations that could overlap, so we need a parallel 361 // move resolver. 362 InvokeRuntimeCallingConvention calling_convention; 363 codegen->EmitParallelMoves(locations->InAt(0), 364 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 365 DataType::Type::kReference, 366 locations->InAt(1), 367 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 368 DataType::Type::kReference); 369 if (instruction_->IsInstanceOf()) { 370 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 371 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 372 } else { 373 DCHECK(instruction_->IsCheckCast()); 374 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 375 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 376 } 377 378 if (!is_fatal_) { 379 if (instruction_->IsInstanceOf()) { 380 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 381 } 382 383 RestoreLiveRegisters(codegen, locations); 384 __ jmp(GetExitLabel()); 385 } 386 } 387 388 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; } 389 390 bool IsFatal() const override { return is_fatal_; } 391 392 private: 393 const bool is_fatal_; 394 395 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); 396 }; 397 398 class DeoptimizationSlowPathX86_64 : public SlowPathCode { 399 public: 400 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) 401 : SlowPathCode(instruction) {} 402 403 void EmitNativeCode(CodeGenerator* codegen) override { 404 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 405 __ Bind(GetEntryLabel()); 406 LocationSummary* locations = instruction_->GetLocations(); 407 SaveLiveRegisters(codegen, locations); 408 InvokeRuntimeCallingConvention calling_convention; 409 x86_64_codegen->Load32BitValue( 410 CpuRegister(calling_convention.GetRegisterAt(0)), 411 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 412 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 413 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 414 } 415 416 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; } 417 418 private: 419 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); 420 }; 421 422 class ArraySetSlowPathX86_64 : public SlowPathCode { 423 public: 424 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} 425 426 void EmitNativeCode(CodeGenerator* codegen) override { 427 LocationSummary* locations = instruction_->GetLocations(); 428 __ Bind(GetEntryLabel()); 429 SaveLiveRegisters(codegen, locations); 430 431 InvokeRuntimeCallingConvention calling_convention; 432 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 433 parallel_move.AddMove( 434 locations->InAt(0), 435 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 436 DataType::Type::kReference, 437 nullptr); 438 parallel_move.AddMove( 439 locations->InAt(1), 440 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 441 DataType::Type::kInt32, 442 nullptr); 443 parallel_move.AddMove( 444 locations->InAt(2), 445 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 446 DataType::Type::kReference, 447 nullptr); 448 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 449 450 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 451 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 452 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 453 RestoreLiveRegisters(codegen, locations); 454 __ jmp(GetExitLabel()); 455 } 456 457 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; } 458 459 private: 460 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); 461 }; 462 463 // Slow path marking an object reference `ref` during a read 464 // barrier. The field `obj.field` in the object `obj` holding this 465 // reference does not get updated by this slow path after marking (see 466 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). 467 // 468 // This means that after the execution of this slow path, `ref` will 469 // always be up-to-date, but `obj.field` may not; i.e., after the 470 // flip, `ref` will be a to-space reference, but `obj.field` will 471 // probably still be a from-space reference (unless it gets updated by 472 // another thread, or if another thread installed another object 473 // reference (different from `ref`) in `obj.field`). 474 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { 475 public: 476 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, 477 Location ref, 478 bool unpoison_ref_before_marking) 479 : SlowPathCode(instruction), 480 ref_(ref), 481 unpoison_ref_before_marking_(unpoison_ref_before_marking) { 482 DCHECK(kEmitCompilerReadBarrier); 483 } 484 485 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } 486 487 void EmitNativeCode(CodeGenerator* codegen) override { 488 LocationSummary* locations = instruction_->GetLocations(); 489 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 490 Register ref_reg = ref_cpu_reg.AsRegister(); 491 DCHECK(locations->CanCall()); 492 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 493 DCHECK(instruction_->IsInstanceFieldGet() || 494 instruction_->IsStaticFieldGet() || 495 instruction_->IsArrayGet() || 496 instruction_->IsArraySet() || 497 instruction_->IsLoadClass() || 498 instruction_->IsLoadString() || 499 instruction_->IsInstanceOf() || 500 instruction_->IsCheckCast() || 501 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 502 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 503 << "Unexpected instruction in read barrier marking slow path: " 504 << instruction_->DebugName(); 505 506 __ Bind(GetEntryLabel()); 507 if (unpoison_ref_before_marking_) { 508 // Object* ref = ref_addr->AsMirrorPtr() 509 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 510 } 511 // No need to save live registers; it's taken care of by the 512 // entrypoint. Also, there is no need to update the stack mask, 513 // as this runtime call will not trigger a garbage collection. 514 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 515 DCHECK_NE(ref_reg, RSP); 516 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 517 // "Compact" slow path, saving two moves. 518 // 519 // Instead of using the standard runtime calling convention (input 520 // and output in R0): 521 // 522 // RDI <- ref 523 // RAX <- ReadBarrierMark(RDI) 524 // ref <- RAX 525 // 526 // we just use rX (the register containing `ref`) as input and output 527 // of a dedicated entrypoint: 528 // 529 // rX <- ReadBarrierMarkRegX(rX) 530 // 531 int32_t entry_point_offset = 532 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 533 // This runtime call does not require a stack map. 534 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 535 __ jmp(GetExitLabel()); 536 } 537 538 private: 539 // The location (register) of the marked object reference. 540 const Location ref_; 541 // Should the reference in `ref_` be unpoisoned prior to marking it? 542 const bool unpoison_ref_before_marking_; 543 544 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); 545 }; 546 547 // Slow path marking an object reference `ref` during a read barrier, 548 // and if needed, atomically updating the field `obj.field` in the 549 // object `obj` holding this reference after marking (contrary to 550 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update 551 // `obj.field`). 552 // 553 // This means that after the execution of this slow path, both `ref` 554 // and `obj.field` will be up-to-date; i.e., after the flip, both will 555 // hold the same to-space reference (unless another thread installed 556 // another object reference (different from `ref`) in `obj.field`). 557 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { 558 public: 559 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, 560 Location ref, 561 CpuRegister obj, 562 const Address& field_addr, 563 bool unpoison_ref_before_marking, 564 CpuRegister temp1, 565 CpuRegister temp2) 566 : SlowPathCode(instruction), 567 ref_(ref), 568 obj_(obj), 569 field_addr_(field_addr), 570 unpoison_ref_before_marking_(unpoison_ref_before_marking), 571 temp1_(temp1), 572 temp2_(temp2) { 573 DCHECK(kEmitCompilerReadBarrier); 574 } 575 576 const char* GetDescription() const override { 577 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; 578 } 579 580 void EmitNativeCode(CodeGenerator* codegen) override { 581 LocationSummary* locations = instruction_->GetLocations(); 582 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 583 Register ref_reg = ref_cpu_reg.AsRegister(); 584 DCHECK(locations->CanCall()); 585 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 586 // This slow path is only used by the UnsafeCASObject intrinsic. 587 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 588 << "Unexpected instruction in read barrier marking and field updating slow path: " 589 << instruction_->DebugName(); 590 DCHECK(instruction_->GetLocations()->Intrinsified()); 591 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 592 593 __ Bind(GetEntryLabel()); 594 if (unpoison_ref_before_marking_) { 595 // Object* ref = ref_addr->AsMirrorPtr() 596 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 597 } 598 599 // Save the old (unpoisoned) reference. 600 __ movl(temp1_, ref_cpu_reg); 601 602 // No need to save live registers; it's taken care of by the 603 // entrypoint. Also, there is no need to update the stack mask, 604 // as this runtime call will not trigger a garbage collection. 605 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 606 DCHECK_NE(ref_reg, RSP); 607 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 608 // "Compact" slow path, saving two moves. 609 // 610 // Instead of using the standard runtime calling convention (input 611 // and output in R0): 612 // 613 // RDI <- ref 614 // RAX <- ReadBarrierMark(RDI) 615 // ref <- RAX 616 // 617 // we just use rX (the register containing `ref`) as input and output 618 // of a dedicated entrypoint: 619 // 620 // rX <- ReadBarrierMarkRegX(rX) 621 // 622 int32_t entry_point_offset = 623 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 624 // This runtime call does not require a stack map. 625 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 626 627 // If the new reference is different from the old reference, 628 // update the field in the holder (`*field_addr`). 629 // 630 // Note that this field could also hold a different object, if 631 // another thread had concurrently changed it. In that case, the 632 // LOCK CMPXCHGL instruction in the compare-and-set (CAS) 633 // operation below would abort the CAS, leaving the field as-is. 634 NearLabel done; 635 __ cmpl(temp1_, ref_cpu_reg); 636 __ j(kEqual, &done); 637 638 // Update the the holder's field atomically. This may fail if 639 // mutator updates before us, but it's OK. This is achived 640 // using a strong compare-and-set (CAS) operation with relaxed 641 // memory synchronization ordering, where the expected value is 642 // the old reference and the desired value is the new reference. 643 // This operation is implemented with a 32-bit LOCK CMPXLCHG 644 // instruction, which requires the expected value (the old 645 // reference) to be in EAX. Save RAX beforehand, and move the 646 // expected value (stored in `temp1_`) into EAX. 647 __ movq(temp2_, CpuRegister(RAX)); 648 __ movl(CpuRegister(RAX), temp1_); 649 650 // Convenience aliases. 651 CpuRegister base = obj_; 652 CpuRegister expected = CpuRegister(RAX); 653 CpuRegister value = ref_cpu_reg; 654 655 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 656 Register value_reg = ref_reg; 657 if (kPoisonHeapReferences) { 658 if (base_equals_value) { 659 // If `base` and `value` are the same register location, move 660 // `value_reg` to a temporary register. This way, poisoning 661 // `value_reg` won't invalidate `base`. 662 value_reg = temp1_.AsRegister(); 663 __ movl(CpuRegister(value_reg), base); 664 } 665 666 // Check that the register allocator did not assign the location 667 // of `expected` (RAX) to `value` nor to `base`, so that heap 668 // poisoning (when enabled) works as intended below. 669 // - If `value` were equal to `expected`, both references would 670 // be poisoned twice, meaning they would not be poisoned at 671 // all, as heap poisoning uses address negation. 672 // - If `base` were equal to `expected`, poisoning `expected` 673 // would invalidate `base`. 674 DCHECK_NE(value_reg, expected.AsRegister()); 675 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 676 677 __ PoisonHeapReference(expected); 678 __ PoisonHeapReference(CpuRegister(value_reg)); 679 } 680 681 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); 682 683 // If heap poisoning is enabled, we need to unpoison the values 684 // that were poisoned earlier. 685 if (kPoisonHeapReferences) { 686 if (base_equals_value) { 687 // `value_reg` has been moved to a temporary register, no need 688 // to unpoison it. 689 } else { 690 __ UnpoisonHeapReference(CpuRegister(value_reg)); 691 } 692 // No need to unpoison `expected` (RAX), as it is be overwritten below. 693 } 694 695 // Restore RAX. 696 __ movq(CpuRegister(RAX), temp2_); 697 698 __ Bind(&done); 699 __ jmp(GetExitLabel()); 700 } 701 702 private: 703 // The location (register) of the marked object reference. 704 const Location ref_; 705 // The register containing the object holding the marked object reference field. 706 const CpuRegister obj_; 707 // The address of the marked reference field. The base of this address must be `obj_`. 708 const Address field_addr_; 709 710 // Should the reference in `ref_` be unpoisoned prior to marking it? 711 const bool unpoison_ref_before_marking_; 712 713 const CpuRegister temp1_; 714 const CpuRegister temp2_; 715 716 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); 717 }; 718 719 // Slow path generating a read barrier for a heap reference. 720 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { 721 public: 722 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, 723 Location out, 724 Location ref, 725 Location obj, 726 uint32_t offset, 727 Location index) 728 : SlowPathCode(instruction), 729 out_(out), 730 ref_(ref), 731 obj_(obj), 732 offset_(offset), 733 index_(index) { 734 DCHECK(kEmitCompilerReadBarrier); 735 // If `obj` is equal to `out` or `ref`, it means the initial 736 // object has been overwritten by (or after) the heap object 737 // reference load to be instrumented, e.g.: 738 // 739 // __ movl(out, Address(out, offset)); 740 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 741 // 742 // In that case, we have lost the information about the original 743 // object, and the emitted read barrier cannot work properly. 744 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 745 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 746 } 747 748 void EmitNativeCode(CodeGenerator* codegen) override { 749 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 750 LocationSummary* locations = instruction_->GetLocations(); 751 CpuRegister reg_out = out_.AsRegister<CpuRegister>(); 752 DCHECK(locations->CanCall()); 753 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; 754 DCHECK(instruction_->IsInstanceFieldGet() || 755 instruction_->IsStaticFieldGet() || 756 instruction_->IsArrayGet() || 757 instruction_->IsInstanceOf() || 758 instruction_->IsCheckCast() || 759 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 760 << "Unexpected instruction in read barrier for heap reference slow path: " 761 << instruction_->DebugName(); 762 763 __ Bind(GetEntryLabel()); 764 SaveLiveRegisters(codegen, locations); 765 766 // We may have to change the index's value, but as `index_` is a 767 // constant member (like other "inputs" of this slow path), 768 // introduce a copy of it, `index`. 769 Location index = index_; 770 if (index_.IsValid()) { 771 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 772 if (instruction_->IsArrayGet()) { 773 // Compute real offset and store it in index_. 774 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); 775 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); 776 if (codegen->IsCoreCalleeSaveRegister(index_reg)) { 777 // We are about to change the value of `index_reg` (see the 778 // calls to art::x86_64::X86_64Assembler::shll and 779 // art::x86_64::X86_64Assembler::AddImmediate below), but it 780 // has not been saved by the previous call to 781 // art::SlowPathCode::SaveLiveRegisters, as it is a 782 // callee-save register -- 783 // art::SlowPathCode::SaveLiveRegisters does not consider 784 // callee-save registers, as it has been designed with the 785 // assumption that callee-save registers are supposed to be 786 // handled by the called function. So, as a callee-save 787 // register, `index_reg` _would_ eventually be saved onto 788 // the stack, but it would be too late: we would have 789 // changed its value earlier. Therefore, we manually save 790 // it here into another freely available register, 791 // `free_reg`, chosen of course among the caller-save 792 // registers (as a callee-save `free_reg` register would 793 // exhibit the same problem). 794 // 795 // Note we could have requested a temporary register from 796 // the register allocator instead; but we prefer not to, as 797 // this is a slow path, and we know we can find a 798 // caller-save register that is available. 799 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); 800 __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); 801 index_reg = free_reg; 802 index = Location::RegisterLocation(index_reg); 803 } else { 804 // The initial register stored in `index_` has already been 805 // saved in the call to art::SlowPathCode::SaveLiveRegisters 806 // (as it is not a callee-save register), so we can freely 807 // use it. 808 } 809 // Shifting the index value contained in `index_reg` by the 810 // scale factor (2) cannot overflow in practice, as the 811 // runtime is unable to allocate object arrays with a size 812 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). 813 __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); 814 static_assert( 815 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 816 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 817 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); 818 } else { 819 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 820 // intrinsics, `index_` is not shifted by a scale factor of 2 821 // (as in the case of ArrayGet), as it is actually an offset 822 // to an object field within an object. 823 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 824 DCHECK(instruction_->GetLocations()->Intrinsified()); 825 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 826 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 827 << instruction_->AsInvoke()->GetIntrinsic(); 828 DCHECK_EQ(offset_, 0U); 829 DCHECK(index_.IsRegister()); 830 } 831 } 832 833 // We're moving two or three locations to locations that could 834 // overlap, so we need a parallel move resolver. 835 InvokeRuntimeCallingConvention calling_convention; 836 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 837 parallel_move.AddMove(ref_, 838 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 839 DataType::Type::kReference, 840 nullptr); 841 parallel_move.AddMove(obj_, 842 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 843 DataType::Type::kReference, 844 nullptr); 845 if (index.IsValid()) { 846 parallel_move.AddMove(index, 847 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 848 DataType::Type::kInt32, 849 nullptr); 850 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 851 } else { 852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 853 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); 854 } 855 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 856 instruction_, 857 instruction_->GetDexPc(), 858 this); 859 CheckEntrypointTypes< 860 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 861 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 862 863 RestoreLiveRegisters(codegen, locations); 864 __ jmp(GetExitLabel()); 865 } 866 867 const char* GetDescription() const override { 868 return "ReadBarrierForHeapReferenceSlowPathX86_64"; 869 } 870 871 private: 872 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 873 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); 874 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); 875 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 876 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 877 return static_cast<CpuRegister>(i); 878 } 879 } 880 // We shall never fail to find a free caller-save register, as 881 // there are more than two core caller-save registers on x86-64 882 // (meaning it is possible to find one which is different from 883 // `ref` and `obj`). 884 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 885 LOG(FATAL) << "Could not find a free caller-save register"; 886 UNREACHABLE(); 887 } 888 889 const Location out_; 890 const Location ref_; 891 const Location obj_; 892 const uint32_t offset_; 893 // An additional location containing an index to an array. 894 // Only used for HArrayGet and the UnsafeGetObject & 895 // UnsafeGetObjectVolatile intrinsics. 896 const Location index_; 897 898 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); 899 }; 900 901 // Slow path generating a read barrier for a GC root. 902 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { 903 public: 904 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) 905 : SlowPathCode(instruction), out_(out), root_(root) { 906 DCHECK(kEmitCompilerReadBarrier); 907 } 908 909 void EmitNativeCode(CodeGenerator* codegen) override { 910 LocationSummary* locations = instruction_->GetLocations(); 911 DCHECK(locations->CanCall()); 912 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 913 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 914 << "Unexpected instruction in read barrier for GC root slow path: " 915 << instruction_->DebugName(); 916 917 __ Bind(GetEntryLabel()); 918 SaveLiveRegisters(codegen, locations); 919 920 InvokeRuntimeCallingConvention calling_convention; 921 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 922 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); 923 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 924 instruction_, 925 instruction_->GetDexPc(), 926 this); 927 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 928 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 929 930 RestoreLiveRegisters(codegen, locations); 931 __ jmp(GetExitLabel()); 932 } 933 934 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; } 935 936 private: 937 const Location out_; 938 const Location root_; 939 940 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); 941 }; 942 943 #undef __ 944 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 945 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 946 947 inline Condition X86_64IntegerCondition(IfCondition cond) { 948 switch (cond) { 949 case kCondEQ: return kEqual; 950 case kCondNE: return kNotEqual; 951 case kCondLT: return kLess; 952 case kCondLE: return kLessEqual; 953 case kCondGT: return kGreater; 954 case kCondGE: return kGreaterEqual; 955 case kCondB: return kBelow; 956 case kCondBE: return kBelowEqual; 957 case kCondA: return kAbove; 958 case kCondAE: return kAboveEqual; 959 } 960 LOG(FATAL) << "Unreachable"; 961 UNREACHABLE(); 962 } 963 964 // Maps FP condition to x86_64 name. 965 inline Condition X86_64FPCondition(IfCondition cond) { 966 switch (cond) { 967 case kCondEQ: return kEqual; 968 case kCondNE: return kNotEqual; 969 case kCondLT: return kBelow; 970 case kCondLE: return kBelowEqual; 971 case kCondGT: return kAbove; 972 case kCondGE: return kAboveEqual; 973 default: break; // should not happen 974 } 975 LOG(FATAL) << "Unreachable"; 976 UNREACHABLE(); 977 } 978 979 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( 980 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 981 ArtMethod* method ATTRIBUTE_UNUSED) { 982 return desired_dispatch_info; 983 } 984 985 void CodeGeneratorX86_64::GenerateStaticOrDirectCall( 986 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 987 // All registers are assumed to be correctly set up. 988 989 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 990 switch (invoke->GetMethodLoadKind()) { 991 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 992 // temp = thread->string_init_entrypoint 993 uint32_t offset = 994 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 995 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true)); 996 break; 997 } 998 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 999 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 1000 break; 1001 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: 1002 DCHECK(GetCompilerOptions().IsBootImage()); 1003 __ leal(temp.AsRegister<CpuRegister>(), 1004 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1005 RecordBootImageMethodPatch(invoke); 1006 break; 1007 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { 1008 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. 1009 __ movl(temp.AsRegister<CpuRegister>(), 1010 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1011 RecordBootImageRelRoPatch(GetBootImageOffset(invoke)); 1012 break; 1013 } 1014 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 1015 __ movq(temp.AsRegister<CpuRegister>(), 1016 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1017 RecordMethodBssEntryPatch(invoke); 1018 break; 1019 } 1020 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: 1021 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); 1022 break; 1023 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 1024 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 1025 return; // No code pointer retrieval; the runtime performs the call directly. 1026 } 1027 } 1028 1029 switch (invoke->GetCodePtrLocation()) { 1030 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 1031 __ call(&frame_entry_label_); 1032 break; 1033 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 1034 // (callee_method + offset_of_quick_compiled_code)() 1035 __ call(Address(callee_method.AsRegister<CpuRegister>(), 1036 ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1037 kX86_64PointerSize).SizeValue())); 1038 break; 1039 } 1040 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1041 1042 DCHECK(!IsLeafMethod()); 1043 } 1044 1045 void CodeGeneratorX86_64::GenerateVirtualCall( 1046 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 1047 CpuRegister temp = temp_in.AsRegister<CpuRegister>(); 1048 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 1049 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); 1050 1051 // Use the calling convention instead of the location of the receiver, as 1052 // intrinsics may have put the receiver in a different register. In the intrinsics 1053 // slow path, the arguments have been moved to the right place, so here we are 1054 // guaranteed that the receiver is the first register of the calling convention. 1055 InvokeDexCallingConvention calling_convention; 1056 Register receiver = calling_convention.GetRegisterAt(0); 1057 1058 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 1059 // /* HeapReference<Class> */ temp = receiver->klass_ 1060 __ movl(temp, Address(CpuRegister(receiver), class_offset)); 1061 MaybeRecordImplicitNullCheck(invoke); 1062 // Instead of simply (possibly) unpoisoning `temp` here, we should 1063 // emit a read barrier for the previous class reference load. 1064 // However this is not required in practice, as this is an 1065 // intermediate/temporary reference and because the current 1066 // concurrent copying collector keeps the from-space memory 1067 // intact/accessible until the end of the marking phase (the 1068 // concurrent copying collector may not in the future). 1069 __ MaybeUnpoisonHeapReference(temp); 1070 // temp = temp->GetMethodAt(method_offset); 1071 __ movq(temp, Address(temp, method_offset)); 1072 // call temp->GetEntryPoint(); 1073 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1074 kX86_64PointerSize).SizeValue())); 1075 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1076 } 1077 1078 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { 1079 boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); 1080 __ Bind(&boot_image_intrinsic_patches_.back().label); 1081 } 1082 1083 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { 1084 boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); 1085 __ Bind(&boot_image_method_patches_.back().label); 1086 } 1087 1088 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { 1089 boot_image_method_patches_.emplace_back( 1090 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); 1091 __ Bind(&boot_image_method_patches_.back().label); 1092 } 1093 1094 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { 1095 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 1096 __ Bind(&method_bss_entry_patches_.back().label); 1097 } 1098 1099 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { 1100 boot_image_type_patches_.emplace_back( 1101 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1102 __ Bind(&boot_image_type_patches_.back().label); 1103 } 1104 1105 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { 1106 type_bss_entry_patches_.emplace_back( 1107 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1108 return &type_bss_entry_patches_.back().label; 1109 } 1110 1111 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { 1112 boot_image_string_patches_.emplace_back( 1113 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1114 __ Bind(&boot_image_string_patches_.back().label); 1115 } 1116 1117 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { 1118 string_bss_entry_patches_.emplace_back( 1119 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1120 return &string_bss_entry_patches_.back().label; 1121 } 1122 1123 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) { 1124 if (GetCompilerOptions().IsBootImage()) { 1125 __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1126 RecordBootImageIntrinsicPatch(boot_image_reference); 1127 } else if (GetCompilerOptions().GetCompilePic()) { 1128 __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1129 RecordBootImageRelRoPatch(boot_image_reference); 1130 } else { 1131 DCHECK(Runtime::Current()->UseJitCompilation()); 1132 gc::Heap* heap = Runtime::Current()->GetHeap(); 1133 DCHECK(!heap->GetBootImageSpaces().empty()); 1134 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; 1135 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); 1136 } 1137 } 1138 1139 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, 1140 uint32_t boot_image_offset) { 1141 DCHECK(invoke->IsStatic()); 1142 InvokeRuntimeCallingConvention calling_convention; 1143 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); 1144 if (GetCompilerOptions().IsBootImage()) { 1145 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); 1146 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. 1147 __ leal(argument, 1148 Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1149 MethodReference target_method = invoke->GetTargetMethod(); 1150 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; 1151 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_); 1152 __ Bind(&boot_image_type_patches_.back().label); 1153 } else { 1154 LoadBootImageAddress(argument, boot_image_offset); 1155 } 1156 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 1157 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 1158 } 1159 1160 // The label points to the end of the "movl" or another instruction but the literal offset 1161 // for method patch needs to point to the embedded constant which occupies the last 4 bytes. 1162 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; 1163 1164 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 1165 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( 1166 const ArenaDeque<PatchInfo<Label>>& infos, 1167 ArenaVector<linker::LinkerPatch>* linker_patches) { 1168 for (const PatchInfo<Label>& info : infos) { 1169 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 1170 linker_patches->push_back( 1171 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); 1172 } 1173 } 1174 1175 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> 1176 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, 1177 const DexFile* target_dex_file, 1178 uint32_t pc_insn_offset, 1179 uint32_t boot_image_offset) { 1180 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. 1181 return Factory(literal_offset, pc_insn_offset, boot_image_offset); 1182 } 1183 1184 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 1185 DCHECK(linker_patches->empty()); 1186 size_t size = 1187 boot_image_method_patches_.size() + 1188 method_bss_entry_patches_.size() + 1189 boot_image_type_patches_.size() + 1190 type_bss_entry_patches_.size() + 1191 boot_image_string_patches_.size() + 1192 string_bss_entry_patches_.size() + 1193 boot_image_intrinsic_patches_.size(); 1194 linker_patches->reserve(size); 1195 if (GetCompilerOptions().IsBootImage()) { 1196 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 1197 boot_image_method_patches_, linker_patches); 1198 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 1199 boot_image_type_patches_, linker_patches); 1200 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 1201 boot_image_string_patches_, linker_patches); 1202 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( 1203 boot_image_intrinsic_patches_, linker_patches); 1204 } else { 1205 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( 1206 boot_image_method_patches_, linker_patches); 1207 DCHECK(boot_image_type_patches_.empty()); 1208 DCHECK(boot_image_string_patches_.empty()); 1209 DCHECK(boot_image_intrinsic_patches_.empty()); 1210 } 1211 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 1212 method_bss_entry_patches_, linker_patches); 1213 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 1214 type_bss_entry_patches_, linker_patches); 1215 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 1216 string_bss_entry_patches_, linker_patches); 1217 DCHECK_EQ(size, linker_patches->size()); 1218 } 1219 1220 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { 1221 stream << Register(reg); 1222 } 1223 1224 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1225 stream << FloatRegister(reg); 1226 } 1227 1228 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const { 1229 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures(); 1230 } 1231 1232 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1233 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); 1234 return kX86_64WordSize; 1235 } 1236 1237 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1238 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1239 return kX86_64WordSize; 1240 } 1241 1242 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1243 if (GetGraph()->HasSIMD()) { 1244 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1245 } else { 1246 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1247 } 1248 return GetFloatingPointSpillSlotSize(); 1249 } 1250 1251 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1252 if (GetGraph()->HasSIMD()) { 1253 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1254 } else { 1255 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1256 } 1257 return GetFloatingPointSpillSlotSize(); 1258 } 1259 1260 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1261 HInstruction* instruction, 1262 uint32_t dex_pc, 1263 SlowPathCode* slow_path) { 1264 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1265 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value()); 1266 if (EntrypointRequiresStackMap(entrypoint)) { 1267 RecordPcInfo(instruction, dex_pc, slow_path); 1268 } 1269 } 1270 1271 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1272 HInstruction* instruction, 1273 SlowPathCode* slow_path) { 1274 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1275 GenerateInvokeRuntime(entry_point_offset); 1276 } 1277 1278 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { 1279 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); 1280 } 1281 1282 static constexpr int kNumberOfCpuRegisterPairs = 0; 1283 // Use a fake return address register to mimic Quick. 1284 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); 1285 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, 1286 const CompilerOptions& compiler_options, 1287 OptimizingCompilerStats* stats) 1288 : CodeGenerator(graph, 1289 kNumberOfCpuRegisters, 1290 kNumberOfFloatRegisters, 1291 kNumberOfCpuRegisterPairs, 1292 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), 1293 arraysize(kCoreCalleeSaves)) 1294 | (1 << kFakeReturnRegister), 1295 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), 1296 arraysize(kFpuCalleeSaves)), 1297 compiler_options, 1298 stats), 1299 block_labels_(nullptr), 1300 location_builder_(graph, this), 1301 instruction_visitor_(graph, this), 1302 move_resolver_(graph->GetAllocator(), this), 1303 assembler_(graph->GetAllocator()), 1304 constant_area_start_(0), 1305 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1306 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1307 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1308 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1309 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1310 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1311 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1312 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1313 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1314 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 1315 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); 1316 } 1317 1318 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, 1319 CodeGeneratorX86_64* codegen) 1320 : InstructionCodeGenerator(graph, codegen), 1321 assembler_(codegen->GetAssembler()), 1322 codegen_(codegen) {} 1323 1324 void CodeGeneratorX86_64::SetupBlockedRegisters() const { 1325 // Stack register is always reserved. 1326 blocked_core_registers_[RSP] = true; 1327 1328 // Block the register used as TMP. 1329 blocked_core_registers_[TMP] = true; 1330 } 1331 1332 static dwarf::Reg DWARFReg(Register reg) { 1333 return dwarf::Reg::X86_64Core(static_cast<int>(reg)); 1334 } 1335 1336 static dwarf::Reg DWARFReg(FloatRegister reg) { 1337 return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); 1338 } 1339 1340 void CodeGeneratorX86_64::GenerateFrameEntry() { 1341 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address 1342 __ Bind(&frame_entry_label_); 1343 bool skip_overflow_check = IsLeafMethod() 1344 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); 1345 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1346 1347 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1348 __ addw(Address(CpuRegister(kMethodRegisterArgument), 1349 ArtMethod::HotnessCountOffset().Int32Value()), 1350 Immediate(1)); 1351 } 1352 1353 if (!skip_overflow_check) { 1354 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); 1355 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); 1356 RecordPcInfo(nullptr, 0); 1357 } 1358 1359 if (HasEmptyFrame()) { 1360 return; 1361 } 1362 1363 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { 1364 Register reg = kCoreCalleeSaves[i]; 1365 if (allocated_registers_.ContainsCoreRegister(reg)) { 1366 __ pushq(CpuRegister(reg)); 1367 __ cfi().AdjustCFAOffset(kX86_64WordSize); 1368 __ cfi().RelOffset(DWARFReg(reg), 0); 1369 } 1370 } 1371 1372 int adjust = GetFrameSize() - GetCoreSpillSize(); 1373 __ subq(CpuRegister(RSP), Immediate(adjust)); 1374 __ cfi().AdjustCFAOffset(adjust); 1375 uint32_t xmm_spill_location = GetFpuSpillStart(); 1376 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1377 1378 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { 1379 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1380 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1381 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); 1382 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); 1383 } 1384 } 1385 1386 // Save the current method if we need it. Note that we do not 1387 // do this in HCurrentMethod, as the instruction might have been removed 1388 // in the SSA graph. 1389 if (RequiresCurrentMethod()) { 1390 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), 1391 CpuRegister(kMethodRegisterArgument)); 1392 } 1393 1394 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1395 // Initialize should_deoptimize flag to 0. 1396 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); 1397 } 1398 } 1399 1400 void CodeGeneratorX86_64::GenerateFrameExit() { 1401 __ cfi().RememberState(); 1402 if (!HasEmptyFrame()) { 1403 uint32_t xmm_spill_location = GetFpuSpillStart(); 1404 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); 1405 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { 1406 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1407 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1408 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); 1409 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); 1410 } 1411 } 1412 1413 int adjust = GetFrameSize() - GetCoreSpillSize(); 1414 __ addq(CpuRegister(RSP), Immediate(adjust)); 1415 __ cfi().AdjustCFAOffset(-adjust); 1416 1417 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { 1418 Register reg = kCoreCalleeSaves[i]; 1419 if (allocated_registers_.ContainsCoreRegister(reg)) { 1420 __ popq(CpuRegister(reg)); 1421 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); 1422 __ cfi().Restore(DWARFReg(reg)); 1423 } 1424 } 1425 } 1426 __ ret(); 1427 __ cfi().RestoreState(); 1428 __ cfi().DefCFAOffset(GetFrameSize()); 1429 } 1430 1431 void CodeGeneratorX86_64::Bind(HBasicBlock* block) { 1432 __ Bind(GetLabelOf(block)); 1433 } 1434 1435 void CodeGeneratorX86_64::Move(Location destination, Location source) { 1436 if (source.Equals(destination)) { 1437 return; 1438 } 1439 if (destination.IsRegister()) { 1440 CpuRegister dest = destination.AsRegister<CpuRegister>(); 1441 if (source.IsRegister()) { 1442 __ movq(dest, source.AsRegister<CpuRegister>()); 1443 } else if (source.IsFpuRegister()) { 1444 __ movd(dest, source.AsFpuRegister<XmmRegister>()); 1445 } else if (source.IsStackSlot()) { 1446 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1447 } else if (source.IsConstant()) { 1448 HConstant* constant = source.GetConstant(); 1449 if (constant->IsLongConstant()) { 1450 Load64BitValue(dest, constant->AsLongConstant()->GetValue()); 1451 } else { 1452 Load32BitValue(dest, GetInt32ValueOf(constant)); 1453 } 1454 } else { 1455 DCHECK(source.IsDoubleStackSlot()); 1456 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1457 } 1458 } else if (destination.IsFpuRegister()) { 1459 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 1460 if (source.IsRegister()) { 1461 __ movd(dest, source.AsRegister<CpuRegister>()); 1462 } else if (source.IsFpuRegister()) { 1463 __ movaps(dest, source.AsFpuRegister<XmmRegister>()); 1464 } else if (source.IsConstant()) { 1465 HConstant* constant = source.GetConstant(); 1466 int64_t value = CodeGenerator::GetInt64ValueOf(constant); 1467 if (constant->IsFloatConstant()) { 1468 Load32BitValue(dest, static_cast<int32_t>(value)); 1469 } else { 1470 Load64BitValue(dest, value); 1471 } 1472 } else if (source.IsStackSlot()) { 1473 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1474 } else { 1475 DCHECK(source.IsDoubleStackSlot()); 1476 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1477 } 1478 } else if (destination.IsStackSlot()) { 1479 if (source.IsRegister()) { 1480 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 1481 source.AsRegister<CpuRegister>()); 1482 } else if (source.IsFpuRegister()) { 1483 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 1484 source.AsFpuRegister<XmmRegister>()); 1485 } else if (source.IsConstant()) { 1486 HConstant* constant = source.GetConstant(); 1487 int32_t value = GetInt32ValueOf(constant); 1488 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 1489 } else { 1490 DCHECK(source.IsStackSlot()) << source; 1491 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1492 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1493 } 1494 } else { 1495 DCHECK(destination.IsDoubleStackSlot()); 1496 if (source.IsRegister()) { 1497 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 1498 source.AsRegister<CpuRegister>()); 1499 } else if (source.IsFpuRegister()) { 1500 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 1501 source.AsFpuRegister<XmmRegister>()); 1502 } else if (source.IsConstant()) { 1503 HConstant* constant = source.GetConstant(); 1504 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); 1505 int64_t value = GetInt64ValueOf(constant); 1506 Store64BitValueToStack(destination, value); 1507 } else { 1508 DCHECK(source.IsDoubleStackSlot()); 1509 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1510 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1511 } 1512 } 1513 } 1514 1515 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { 1516 DCHECK(location.IsRegister()); 1517 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); 1518 } 1519 1520 void CodeGeneratorX86_64::MoveLocation( 1521 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { 1522 Move(dst, src); 1523 } 1524 1525 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1526 if (location.IsRegister()) { 1527 locations->AddTemp(location); 1528 } else { 1529 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1530 } 1531 } 1532 1533 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 1534 if (successor->IsExitBlock()) { 1535 DCHECK(got->GetPrevious()->AlwaysThrows()); 1536 return; // no code needed 1537 } 1538 1539 HBasicBlock* block = got->GetBlock(); 1540 HInstruction* previous = got->GetPrevious(); 1541 1542 HLoopInformation* info = block->GetLoopInformation(); 1543 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 1544 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { 1545 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); 1546 __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), 1547 Immediate(1)); 1548 } 1549 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 1550 return; 1551 } 1552 1553 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 1554 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 1555 } 1556 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { 1557 __ jmp(codegen_->GetLabelOf(successor)); 1558 } 1559 } 1560 1561 void LocationsBuilderX86_64::VisitGoto(HGoto* got) { 1562 got->SetLocations(nullptr); 1563 } 1564 1565 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { 1566 HandleGoto(got, got->GetSuccessor()); 1567 } 1568 1569 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1570 try_boundary->SetLocations(nullptr); 1571 } 1572 1573 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1574 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 1575 if (!successor->IsExitBlock()) { 1576 HandleGoto(try_boundary, successor); 1577 } 1578 } 1579 1580 void LocationsBuilderX86_64::VisitExit(HExit* exit) { 1581 exit->SetLocations(nullptr); 1582 } 1583 1584 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 1585 } 1586 1587 template<class LabelType> 1588 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, 1589 LabelType* true_label, 1590 LabelType* false_label) { 1591 if (cond->IsFPConditionTrueIfNaN()) { 1592 __ j(kUnordered, true_label); 1593 } else if (cond->IsFPConditionFalseIfNaN()) { 1594 __ j(kUnordered, false_label); 1595 } 1596 __ j(X86_64FPCondition(cond->GetCondition()), true_label); 1597 } 1598 1599 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) { 1600 LocationSummary* locations = condition->GetLocations(); 1601 1602 Location left = locations->InAt(0); 1603 Location right = locations->InAt(1); 1604 DataType::Type type = condition->InputAt(0)->GetType(); 1605 switch (type) { 1606 case DataType::Type::kBool: 1607 case DataType::Type::kUint8: 1608 case DataType::Type::kInt8: 1609 case DataType::Type::kUint16: 1610 case DataType::Type::kInt16: 1611 case DataType::Type::kInt32: 1612 case DataType::Type::kReference: { 1613 codegen_->GenerateIntCompare(left, right); 1614 break; 1615 } 1616 case DataType::Type::kInt64: { 1617 codegen_->GenerateLongCompare(left, right); 1618 break; 1619 } 1620 case DataType::Type::kFloat32: { 1621 if (right.IsFpuRegister()) { 1622 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1623 } else if (right.IsConstant()) { 1624 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1625 codegen_->LiteralFloatAddress( 1626 right.GetConstant()->AsFloatConstant()->GetValue())); 1627 } else { 1628 DCHECK(right.IsStackSlot()); 1629 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1630 Address(CpuRegister(RSP), right.GetStackIndex())); 1631 } 1632 break; 1633 } 1634 case DataType::Type::kFloat64: { 1635 if (right.IsFpuRegister()) { 1636 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1637 } else if (right.IsConstant()) { 1638 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1639 codegen_->LiteralDoubleAddress( 1640 right.GetConstant()->AsDoubleConstant()->GetValue())); 1641 } else { 1642 DCHECK(right.IsDoubleStackSlot()); 1643 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1644 Address(CpuRegister(RSP), right.GetStackIndex())); 1645 } 1646 break; 1647 } 1648 default: 1649 LOG(FATAL) << "Unexpected condition type " << type; 1650 } 1651 } 1652 1653 template<class LabelType> 1654 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, 1655 LabelType* true_target_in, 1656 LabelType* false_target_in) { 1657 // Generated branching requires both targets to be explicit. If either of the 1658 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. 1659 LabelType fallthrough_target; 1660 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; 1661 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; 1662 1663 // Generate the comparison to set the CC. 1664 GenerateCompareTest(condition); 1665 1666 // Now generate the correct jump(s). 1667 DataType::Type type = condition->InputAt(0)->GetType(); 1668 switch (type) { 1669 case DataType::Type::kInt64: { 1670 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1671 break; 1672 } 1673 case DataType::Type::kFloat32: { 1674 GenerateFPJumps(condition, true_target, false_target); 1675 break; 1676 } 1677 case DataType::Type::kFloat64: { 1678 GenerateFPJumps(condition, true_target, false_target); 1679 break; 1680 } 1681 default: 1682 LOG(FATAL) << "Unexpected condition type " << type; 1683 } 1684 1685 if (false_target != &fallthrough_target) { 1686 __ jmp(false_target); 1687 } 1688 1689 if (fallthrough_target.IsLinked()) { 1690 __ Bind(&fallthrough_target); 1691 } 1692 } 1693 1694 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { 1695 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS 1696 // are set only strictly before `branch`. We can't use the eflags on long 1697 // conditions if they are materialized due to the complex branching. 1698 return cond->IsCondition() && 1699 cond->GetNext() == branch && 1700 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); 1701 } 1702 1703 template<class LabelType> 1704 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, 1705 size_t condition_input_index, 1706 LabelType* true_target, 1707 LabelType* false_target) { 1708 HInstruction* cond = instruction->InputAt(condition_input_index); 1709 1710 if (true_target == nullptr && false_target == nullptr) { 1711 // Nothing to do. The code always falls through. 1712 return; 1713 } else if (cond->IsIntConstant()) { 1714 // Constant condition, statically compared against "true" (integer value 1). 1715 if (cond->AsIntConstant()->IsTrue()) { 1716 if (true_target != nullptr) { 1717 __ jmp(true_target); 1718 } 1719 } else { 1720 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 1721 if (false_target != nullptr) { 1722 __ jmp(false_target); 1723 } 1724 } 1725 return; 1726 } 1727 1728 // The following code generates these patterns: 1729 // (1) true_target == nullptr && false_target != nullptr 1730 // - opposite condition true => branch to false_target 1731 // (2) true_target != nullptr && false_target == nullptr 1732 // - condition true => branch to true_target 1733 // (3) true_target != nullptr && false_target != nullptr 1734 // - condition true => branch to true_target 1735 // - branch to false_target 1736 if (IsBooleanValueOrMaterializedCondition(cond)) { 1737 if (AreEflagsSetFrom(cond, instruction)) { 1738 if (true_target == nullptr) { 1739 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); 1740 } else { 1741 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); 1742 } 1743 } else { 1744 // Materialized condition, compare against 0. 1745 Location lhs = instruction->GetLocations()->InAt(condition_input_index); 1746 if (lhs.IsRegister()) { 1747 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); 1748 } else { 1749 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); 1750 } 1751 if (true_target == nullptr) { 1752 __ j(kEqual, false_target); 1753 } else { 1754 __ j(kNotEqual, true_target); 1755 } 1756 } 1757 } else { 1758 // Condition has not been materialized, use its inputs as the 1759 // comparison and its condition as the branch condition. 1760 HCondition* condition = cond->AsCondition(); 1761 1762 // If this is a long or FP comparison that has been folded into 1763 // the HCondition, generate the comparison directly. 1764 DataType::Type type = condition->InputAt(0)->GetType(); 1765 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { 1766 GenerateCompareTestAndBranch(condition, true_target, false_target); 1767 return; 1768 } 1769 1770 Location lhs = condition->GetLocations()->InAt(0); 1771 Location rhs = condition->GetLocations()->InAt(1); 1772 codegen_->GenerateIntCompare(lhs, rhs); 1773 if (true_target == nullptr) { 1774 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); 1775 } else { 1776 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1777 } 1778 } 1779 1780 // If neither branch falls through (case 3), the conditional branch to `true_target` 1781 // was already emitted (case 2) and we need to emit a jump to `false_target`. 1782 if (true_target != nullptr && false_target != nullptr) { 1783 __ jmp(false_target); 1784 } 1785 } 1786 1787 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { 1788 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 1789 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 1790 locations->SetInAt(0, Location::Any()); 1791 } 1792 } 1793 1794 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { 1795 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 1796 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 1797 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 1798 nullptr : codegen_->GetLabelOf(true_successor); 1799 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 1800 nullptr : codegen_->GetLabelOf(false_successor); 1801 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); 1802 } 1803 1804 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1805 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1806 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 1807 InvokeRuntimeCallingConvention calling_convention; 1808 RegisterSet caller_saves = RegisterSet::Empty(); 1809 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1810 locations->SetCustomSlowPathCallerSaves(caller_saves); 1811 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 1812 locations->SetInAt(0, Location::Any()); 1813 } 1814 } 1815 1816 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1817 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); 1818 GenerateTestAndBranch<Label>(deoptimize, 1819 /* condition_input_index= */ 0, 1820 slow_path->GetEntryLabel(), 1821 /* false_target= */ nullptr); 1822 } 1823 1824 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1825 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1826 LocationSummary(flag, LocationSummary::kNoCall); 1827 locations->SetOut(Location::RequiresRegister()); 1828 } 1829 1830 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1831 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(), 1832 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 1833 } 1834 1835 static bool SelectCanUseCMOV(HSelect* select) { 1836 // There are no conditional move instructions for XMMs. 1837 if (DataType::IsFloatingPointType(select->GetType())) { 1838 return false; 1839 } 1840 1841 // A FP condition doesn't generate the single CC that we need. 1842 HInstruction* condition = select->GetCondition(); 1843 if (condition->IsCondition() && 1844 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) { 1845 return false; 1846 } 1847 1848 // We can generate a CMOV for this Select. 1849 return true; 1850 } 1851 1852 void LocationsBuilderX86_64::VisitSelect(HSelect* select) { 1853 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 1854 if (DataType::IsFloatingPointType(select->GetType())) { 1855 locations->SetInAt(0, Location::RequiresFpuRegister()); 1856 locations->SetInAt(1, Location::Any()); 1857 } else { 1858 locations->SetInAt(0, Location::RequiresRegister()); 1859 if (SelectCanUseCMOV(select)) { 1860 if (select->InputAt(1)->IsConstant()) { 1861 locations->SetInAt(1, Location::RequiresRegister()); 1862 } else { 1863 locations->SetInAt(1, Location::Any()); 1864 } 1865 } else { 1866 locations->SetInAt(1, Location::Any()); 1867 } 1868 } 1869 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 1870 locations->SetInAt(2, Location::RequiresRegister()); 1871 } 1872 locations->SetOut(Location::SameAsFirstInput()); 1873 } 1874 1875 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { 1876 LocationSummary* locations = select->GetLocations(); 1877 if (SelectCanUseCMOV(select)) { 1878 // If both the condition and the source types are integer, we can generate 1879 // a CMOV to implement Select. 1880 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>(); 1881 Location value_true_loc = locations->InAt(1); 1882 DCHECK(locations->InAt(0).Equals(locations->Out())); 1883 1884 HInstruction* select_condition = select->GetCondition(); 1885 Condition cond = kNotEqual; 1886 1887 // Figure out how to test the 'condition'. 1888 if (select_condition->IsCondition()) { 1889 HCondition* condition = select_condition->AsCondition(); 1890 if (!condition->IsEmittedAtUseSite()) { 1891 // This was a previously materialized condition. 1892 // Can we use the existing condition code? 1893 if (AreEflagsSetFrom(condition, select)) { 1894 // Materialization was the previous instruction. Condition codes are right. 1895 cond = X86_64IntegerCondition(condition->GetCondition()); 1896 } else { 1897 // No, we have to recreate the condition code. 1898 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1899 __ testl(cond_reg, cond_reg); 1900 } 1901 } else { 1902 GenerateCompareTest(condition); 1903 cond = X86_64IntegerCondition(condition->GetCondition()); 1904 } 1905 } else { 1906 // Must be a Boolean condition, which needs to be compared to 0. 1907 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1908 __ testl(cond_reg, cond_reg); 1909 } 1910 1911 // If the condition is true, overwrite the output, which already contains false. 1912 // Generate the correct sized CMOV. 1913 bool is_64_bit = DataType::Is64BitType(select->GetType()); 1914 if (value_true_loc.IsRegister()) { 1915 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit); 1916 } else { 1917 __ cmov(cond, 1918 value_false, 1919 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit); 1920 } 1921 } else { 1922 NearLabel false_target; 1923 GenerateTestAndBranch<NearLabel>(select, 1924 /* condition_input_index= */ 2, 1925 /* true_target= */ nullptr, 1926 &false_target); 1927 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 1928 __ Bind(&false_target); 1929 } 1930 } 1931 1932 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 1933 new (GetGraph()->GetAllocator()) LocationSummary(info); 1934 } 1935 1936 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { 1937 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 1938 } 1939 1940 void CodeGeneratorX86_64::GenerateNop() { 1941 __ nop(); 1942 } 1943 1944 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { 1945 LocationSummary* locations = 1946 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); 1947 // Handle the long/FP comparisons made in instruction simplification. 1948 switch (cond->InputAt(0)->GetType()) { 1949 case DataType::Type::kInt64: 1950 locations->SetInAt(0, Location::RequiresRegister()); 1951 locations->SetInAt(1, Location::Any()); 1952 break; 1953 case DataType::Type::kFloat32: 1954 case DataType::Type::kFloat64: 1955 locations->SetInAt(0, Location::RequiresFpuRegister()); 1956 locations->SetInAt(1, Location::Any()); 1957 break; 1958 default: 1959 locations->SetInAt(0, Location::RequiresRegister()); 1960 locations->SetInAt(1, Location::Any()); 1961 break; 1962 } 1963 if (!cond->IsEmittedAtUseSite()) { 1964 locations->SetOut(Location::RequiresRegister()); 1965 } 1966 } 1967 1968 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { 1969 if (cond->IsEmittedAtUseSite()) { 1970 return; 1971 } 1972 1973 LocationSummary* locations = cond->GetLocations(); 1974 Location lhs = locations->InAt(0); 1975 Location rhs = locations->InAt(1); 1976 CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); 1977 NearLabel true_label, false_label; 1978 1979 switch (cond->InputAt(0)->GetType()) { 1980 default: 1981 // Integer case. 1982 1983 // Clear output register: setcc only sets the low byte. 1984 __ xorl(reg, reg); 1985 1986 codegen_->GenerateIntCompare(lhs, rhs); 1987 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1988 return; 1989 case DataType::Type::kInt64: 1990 // Clear output register: setcc only sets the low byte. 1991 __ xorl(reg, reg); 1992 1993 codegen_->GenerateLongCompare(lhs, rhs); 1994 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 1995 return; 1996 case DataType::Type::kFloat32: { 1997 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 1998 if (rhs.IsConstant()) { 1999 float value = rhs.GetConstant()->AsFloatConstant()->GetValue(); 2000 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value)); 2001 } else if (rhs.IsStackSlot()) { 2002 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 2003 } else { 2004 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 2005 } 2006 GenerateFPJumps(cond, &true_label, &false_label); 2007 break; 2008 } 2009 case DataType::Type::kFloat64: { 2010 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 2011 if (rhs.IsConstant()) { 2012 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue(); 2013 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value)); 2014 } else if (rhs.IsDoubleStackSlot()) { 2015 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 2016 } else { 2017 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 2018 } 2019 GenerateFPJumps(cond, &true_label, &false_label); 2020 break; 2021 } 2022 } 2023 2024 // Convert the jumps into the result. 2025 NearLabel done_label; 2026 2027 // False case: result = 0. 2028 __ Bind(&false_label); 2029 __ xorl(reg, reg); 2030 __ jmp(&done_label); 2031 2032 // True case: result = 1. 2033 __ Bind(&true_label); 2034 __ movl(reg, Immediate(1)); 2035 __ Bind(&done_label); 2036 } 2037 2038 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { 2039 HandleCondition(comp); 2040 } 2041 2042 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { 2043 HandleCondition(comp); 2044 } 2045 2046 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { 2047 HandleCondition(comp); 2048 } 2049 2050 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { 2051 HandleCondition(comp); 2052 } 2053 2054 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { 2055 HandleCondition(comp); 2056 } 2057 2058 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { 2059 HandleCondition(comp); 2060 } 2061 2062 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 2063 HandleCondition(comp); 2064 } 2065 2066 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 2067 HandleCondition(comp); 2068 } 2069 2070 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { 2071 HandleCondition(comp); 2072 } 2073 2074 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { 2075 HandleCondition(comp); 2076 } 2077 2078 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2079 HandleCondition(comp); 2080 } 2081 2082 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2083 HandleCondition(comp); 2084 } 2085 2086 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { 2087 HandleCondition(comp); 2088 } 2089 2090 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { 2091 HandleCondition(comp); 2092 } 2093 2094 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2095 HandleCondition(comp); 2096 } 2097 2098 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2099 HandleCondition(comp); 2100 } 2101 2102 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { 2103 HandleCondition(comp); 2104 } 2105 2106 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { 2107 HandleCondition(comp); 2108 } 2109 2110 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2111 HandleCondition(comp); 2112 } 2113 2114 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2115 HandleCondition(comp); 2116 } 2117 2118 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { 2119 LocationSummary* locations = 2120 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 2121 switch (compare->InputAt(0)->GetType()) { 2122 case DataType::Type::kBool: 2123 case DataType::Type::kUint8: 2124 case DataType::Type::kInt8: 2125 case DataType::Type::kUint16: 2126 case DataType::Type::kInt16: 2127 case DataType::Type::kInt32: 2128 case DataType::Type::kInt64: { 2129 locations->SetInAt(0, Location::RequiresRegister()); 2130 locations->SetInAt(1, Location::Any()); 2131 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2132 break; 2133 } 2134 case DataType::Type::kFloat32: 2135 case DataType::Type::kFloat64: { 2136 locations->SetInAt(0, Location::RequiresFpuRegister()); 2137 locations->SetInAt(1, Location::Any()); 2138 locations->SetOut(Location::RequiresRegister()); 2139 break; 2140 } 2141 default: 2142 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 2143 } 2144 } 2145 2146 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { 2147 LocationSummary* locations = compare->GetLocations(); 2148 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2149 Location left = locations->InAt(0); 2150 Location right = locations->InAt(1); 2151 2152 NearLabel less, greater, done; 2153 DataType::Type type = compare->InputAt(0)->GetType(); 2154 Condition less_cond = kLess; 2155 2156 switch (type) { 2157 case DataType::Type::kBool: 2158 case DataType::Type::kUint8: 2159 case DataType::Type::kInt8: 2160 case DataType::Type::kUint16: 2161 case DataType::Type::kInt16: 2162 case DataType::Type::kInt32: { 2163 codegen_->GenerateIntCompare(left, right); 2164 break; 2165 } 2166 case DataType::Type::kInt64: { 2167 codegen_->GenerateLongCompare(left, right); 2168 break; 2169 } 2170 case DataType::Type::kFloat32: { 2171 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2172 if (right.IsConstant()) { 2173 float value = right.GetConstant()->AsFloatConstant()->GetValue(); 2174 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); 2175 } else if (right.IsStackSlot()) { 2176 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2177 } else { 2178 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); 2179 } 2180 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2181 less_cond = kBelow; // ucomis{s,d} sets CF 2182 break; 2183 } 2184 case DataType::Type::kFloat64: { 2185 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2186 if (right.IsConstant()) { 2187 double value = right.GetConstant()->AsDoubleConstant()->GetValue(); 2188 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); 2189 } else if (right.IsDoubleStackSlot()) { 2190 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2191 } else { 2192 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); 2193 } 2194 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2195 less_cond = kBelow; // ucomis{s,d} sets CF 2196 break; 2197 } 2198 default: 2199 LOG(FATAL) << "Unexpected compare type " << type; 2200 } 2201 2202 __ movl(out, Immediate(0)); 2203 __ j(kEqual, &done); 2204 __ j(less_cond, &less); 2205 2206 __ Bind(&greater); 2207 __ movl(out, Immediate(1)); 2208 __ jmp(&done); 2209 2210 __ Bind(&less); 2211 __ movl(out, Immediate(-1)); 2212 2213 __ Bind(&done); 2214 } 2215 2216 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { 2217 LocationSummary* locations = 2218 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2219 locations->SetOut(Location::ConstantLocation(constant)); 2220 } 2221 2222 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 2223 // Will be generated at use site. 2224 } 2225 2226 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { 2227 LocationSummary* locations = 2228 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2229 locations->SetOut(Location::ConstantLocation(constant)); 2230 } 2231 2232 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 2233 // Will be generated at use site. 2234 } 2235 2236 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { 2237 LocationSummary* locations = 2238 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2239 locations->SetOut(Location::ConstantLocation(constant)); 2240 } 2241 2242 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 2243 // Will be generated at use site. 2244 } 2245 2246 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { 2247 LocationSummary* locations = 2248 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2249 locations->SetOut(Location::ConstantLocation(constant)); 2250 } 2251 2252 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2253 // Will be generated at use site. 2254 } 2255 2256 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { 2257 LocationSummary* locations = 2258 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2259 locations->SetOut(Location::ConstantLocation(constant)); 2260 } 2261 2262 void InstructionCodeGeneratorX86_64::VisitDoubleConstant( 2263 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2264 // Will be generated at use site. 2265 } 2266 2267 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { 2268 constructor_fence->SetLocations(nullptr); 2269 } 2270 2271 void InstructionCodeGeneratorX86_64::VisitConstructorFence( 2272 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 2273 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2274 } 2275 2276 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2277 memory_barrier->SetLocations(nullptr); 2278 } 2279 2280 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2281 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 2282 } 2283 2284 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { 2285 ret->SetLocations(nullptr); 2286 } 2287 2288 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 2289 codegen_->GenerateFrameExit(); 2290 } 2291 2292 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { 2293 LocationSummary* locations = 2294 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); 2295 switch (ret->InputAt(0)->GetType()) { 2296 case DataType::Type::kReference: 2297 case DataType::Type::kBool: 2298 case DataType::Type::kUint8: 2299 case DataType::Type::kInt8: 2300 case DataType::Type::kUint16: 2301 case DataType::Type::kInt16: 2302 case DataType::Type::kInt32: 2303 case DataType::Type::kInt64: 2304 locations->SetInAt(0, Location::RegisterLocation(RAX)); 2305 break; 2306 2307 case DataType::Type::kFloat32: 2308 case DataType::Type::kFloat64: 2309 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); 2310 break; 2311 2312 default: 2313 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2314 } 2315 } 2316 2317 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { 2318 if (kIsDebugBuild) { 2319 switch (ret->InputAt(0)->GetType()) { 2320 case DataType::Type::kReference: 2321 case DataType::Type::kBool: 2322 case DataType::Type::kUint8: 2323 case DataType::Type::kInt8: 2324 case DataType::Type::kUint16: 2325 case DataType::Type::kInt16: 2326 case DataType::Type::kInt32: 2327 case DataType::Type::kInt64: 2328 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); 2329 break; 2330 2331 case DataType::Type::kFloat32: 2332 case DataType::Type::kFloat64: 2333 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), 2334 XMM0); 2335 break; 2336 2337 default: 2338 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2339 } 2340 } 2341 codegen_->GenerateFrameExit(); 2342 } 2343 2344 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const { 2345 switch (type) { 2346 case DataType::Type::kReference: 2347 case DataType::Type::kBool: 2348 case DataType::Type::kUint8: 2349 case DataType::Type::kInt8: 2350 case DataType::Type::kUint16: 2351 case DataType::Type::kInt16: 2352 case DataType::Type::kUint32: 2353 case DataType::Type::kInt32: 2354 case DataType::Type::kUint64: 2355 case DataType::Type::kInt64: 2356 return Location::RegisterLocation(RAX); 2357 2358 case DataType::Type::kVoid: 2359 return Location::NoLocation(); 2360 2361 case DataType::Type::kFloat64: 2362 case DataType::Type::kFloat32: 2363 return Location::FpuRegisterLocation(XMM0); 2364 } 2365 2366 UNREACHABLE(); 2367 } 2368 2369 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const { 2370 return Location::RegisterLocation(kMethodRegisterArgument); 2371 } 2372 2373 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) { 2374 switch (type) { 2375 case DataType::Type::kReference: 2376 case DataType::Type::kBool: 2377 case DataType::Type::kUint8: 2378 case DataType::Type::kInt8: 2379 case DataType::Type::kUint16: 2380 case DataType::Type::kInt16: 2381 case DataType::Type::kInt32: { 2382 uint32_t index = gp_index_++; 2383 stack_index_++; 2384 if (index < calling_convention.GetNumberOfRegisters()) { 2385 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2386 } else { 2387 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2388 } 2389 } 2390 2391 case DataType::Type::kInt64: { 2392 uint32_t index = gp_index_; 2393 stack_index_ += 2; 2394 if (index < calling_convention.GetNumberOfRegisters()) { 2395 gp_index_ += 1; 2396 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2397 } else { 2398 gp_index_ += 2; 2399 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2400 } 2401 } 2402 2403 case DataType::Type::kFloat32: { 2404 uint32_t index = float_index_++; 2405 stack_index_++; 2406 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2407 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2408 } else { 2409 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2410 } 2411 } 2412 2413 case DataType::Type::kFloat64: { 2414 uint32_t index = float_index_++; 2415 stack_index_ += 2; 2416 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2417 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2418 } else { 2419 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2420 } 2421 } 2422 2423 case DataType::Type::kUint32: 2424 case DataType::Type::kUint64: 2425 case DataType::Type::kVoid: 2426 LOG(FATAL) << "Unexpected parameter type " << type; 2427 UNREACHABLE(); 2428 } 2429 return Location::NoLocation(); 2430 } 2431 2432 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2433 // The trampoline uses the same calling convention as dex calling conventions, 2434 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 2435 // the method_idx. 2436 HandleInvoke(invoke); 2437 } 2438 2439 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2440 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 2441 } 2442 2443 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2444 // Explicit clinit checks triggered by static invokes must have been pruned by 2445 // art::PrepareForRegisterAllocation. 2446 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2447 2448 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2449 if (intrinsic.TryDispatch(invoke)) { 2450 return; 2451 } 2452 2453 HandleInvoke(invoke); 2454 } 2455 2456 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2457 if (invoke->GetLocations()->Intrinsified()) { 2458 IntrinsicCodeGeneratorX86_64 intrinsic(codegen); 2459 intrinsic.Dispatch(invoke); 2460 return true; 2461 } 2462 return false; 2463 } 2464 2465 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2466 // Explicit clinit checks triggered by static invokes must have been pruned by 2467 // art::PrepareForRegisterAllocation. 2468 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2469 2470 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2471 return; 2472 } 2473 2474 LocationSummary* locations = invoke->GetLocations(); 2475 codegen_->GenerateStaticOrDirectCall( 2476 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 2477 } 2478 2479 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { 2480 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 2481 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 2482 } 2483 2484 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2485 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2486 if (intrinsic.TryDispatch(invoke)) { 2487 return; 2488 } 2489 2490 HandleInvoke(invoke); 2491 } 2492 2493 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2494 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2495 return; 2496 } 2497 2498 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 2499 DCHECK(!codegen_->IsLeafMethod()); 2500 } 2501 2502 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2503 HandleInvoke(invoke); 2504 // Add the hidden argument. 2505 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX)); 2506 } 2507 2508 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2509 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 2510 LocationSummary* locations = invoke->GetLocations(); 2511 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2512 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 2513 Location receiver = locations->InAt(0); 2514 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 2515 2516 // Set the hidden argument. This is safe to do this here, as RAX 2517 // won't be modified thereafter, before the `call` instruction. 2518 DCHECK_EQ(RAX, hidden_reg.AsRegister()); 2519 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); 2520 2521 if (receiver.IsStackSlot()) { 2522 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); 2523 // /* HeapReference<Class> */ temp = temp->klass_ 2524 __ movl(temp, Address(temp, class_offset)); 2525 } else { 2526 // /* HeapReference<Class> */ temp = receiver->klass_ 2527 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); 2528 } 2529 codegen_->MaybeRecordImplicitNullCheck(invoke); 2530 // Instead of simply (possibly) unpoisoning `temp` here, we should 2531 // emit a read barrier for the previous class reference load. 2532 // However this is not required in practice, as this is an 2533 // intermediate/temporary reference and because the current 2534 // concurrent copying collector keeps the from-space memory 2535 // intact/accessible until the end of the marking phase (the 2536 // concurrent copying collector may not in the future). 2537 __ MaybeUnpoisonHeapReference(temp); 2538 // temp = temp->GetAddressOfIMT() 2539 __ movq(temp, 2540 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 2541 // temp = temp->GetImtEntryAt(method_offset); 2542 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 2543 invoke->GetImtIndex(), kX86_64PointerSize)); 2544 // temp = temp->GetImtEntryAt(method_offset); 2545 __ movq(temp, Address(temp, method_offset)); 2546 // call temp->GetEntryPoint(); 2547 __ call(Address( 2548 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue())); 2549 2550 DCHECK(!codegen_->IsLeafMethod()); 2551 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 2552 } 2553 2554 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2555 HandleInvoke(invoke); 2556 } 2557 2558 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2559 codegen_->GenerateInvokePolymorphicCall(invoke); 2560 } 2561 2562 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { 2563 HandleInvoke(invoke); 2564 } 2565 2566 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { 2567 codegen_->GenerateInvokeCustomCall(invoke); 2568 } 2569 2570 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { 2571 LocationSummary* locations = 2572 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 2573 switch (neg->GetResultType()) { 2574 case DataType::Type::kInt32: 2575 case DataType::Type::kInt64: 2576 locations->SetInAt(0, Location::RequiresRegister()); 2577 locations->SetOut(Location::SameAsFirstInput()); 2578 break; 2579 2580 case DataType::Type::kFloat32: 2581 case DataType::Type::kFloat64: 2582 locations->SetInAt(0, Location::RequiresFpuRegister()); 2583 locations->SetOut(Location::SameAsFirstInput()); 2584 locations->AddTemp(Location::RequiresFpuRegister()); 2585 break; 2586 2587 default: 2588 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2589 } 2590 } 2591 2592 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { 2593 LocationSummary* locations = neg->GetLocations(); 2594 Location out = locations->Out(); 2595 Location in = locations->InAt(0); 2596 switch (neg->GetResultType()) { 2597 case DataType::Type::kInt32: 2598 DCHECK(in.IsRegister()); 2599 DCHECK(in.Equals(out)); 2600 __ negl(out.AsRegister<CpuRegister>()); 2601 break; 2602 2603 case DataType::Type::kInt64: 2604 DCHECK(in.IsRegister()); 2605 DCHECK(in.Equals(out)); 2606 __ negq(out.AsRegister<CpuRegister>()); 2607 break; 2608 2609 case DataType::Type::kFloat32: { 2610 DCHECK(in.Equals(out)); 2611 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2612 // Implement float negation with an exclusive or with value 2613 // 0x80000000 (mask for bit 31, representing the sign of a 2614 // single-precision floating-point number). 2615 __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); 2616 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2617 break; 2618 } 2619 2620 case DataType::Type::kFloat64: { 2621 DCHECK(in.Equals(out)); 2622 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2623 // Implement double negation with an exclusive or with value 2624 // 0x8000000000000000 (mask for bit 63, representing the sign of 2625 // a double-precision floating-point number). 2626 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); 2627 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2628 break; 2629 } 2630 2631 default: 2632 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2633 } 2634 } 2635 2636 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2637 LocationSummary* locations = 2638 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 2639 DataType::Type result_type = conversion->GetResultType(); 2640 DataType::Type input_type = conversion->GetInputType(); 2641 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2642 << input_type << " -> " << result_type; 2643 2644 switch (result_type) { 2645 case DataType::Type::kUint8: 2646 case DataType::Type::kInt8: 2647 case DataType::Type::kUint16: 2648 case DataType::Type::kInt16: 2649 DCHECK(DataType::IsIntegralType(input_type)) << input_type; 2650 locations->SetInAt(0, Location::Any()); 2651 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2652 break; 2653 2654 case DataType::Type::kInt32: 2655 switch (input_type) { 2656 case DataType::Type::kInt64: 2657 locations->SetInAt(0, Location::Any()); 2658 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2659 break; 2660 2661 case DataType::Type::kFloat32: 2662 locations->SetInAt(0, Location::RequiresFpuRegister()); 2663 locations->SetOut(Location::RequiresRegister()); 2664 break; 2665 2666 case DataType::Type::kFloat64: 2667 locations->SetInAt(0, Location::RequiresFpuRegister()); 2668 locations->SetOut(Location::RequiresRegister()); 2669 break; 2670 2671 default: 2672 LOG(FATAL) << "Unexpected type conversion from " << input_type 2673 << " to " << result_type; 2674 } 2675 break; 2676 2677 case DataType::Type::kInt64: 2678 switch (input_type) { 2679 case DataType::Type::kBool: 2680 case DataType::Type::kUint8: 2681 case DataType::Type::kInt8: 2682 case DataType::Type::kUint16: 2683 case DataType::Type::kInt16: 2684 case DataType::Type::kInt32: 2685 // TODO: We would benefit from a (to-be-implemented) 2686 // Location::RegisterOrStackSlot requirement for this input. 2687 locations->SetInAt(0, Location::RequiresRegister()); 2688 locations->SetOut(Location::RequiresRegister()); 2689 break; 2690 2691 case DataType::Type::kFloat32: 2692 locations->SetInAt(0, Location::RequiresFpuRegister()); 2693 locations->SetOut(Location::RequiresRegister()); 2694 break; 2695 2696 case DataType::Type::kFloat64: 2697 locations->SetInAt(0, Location::RequiresFpuRegister()); 2698 locations->SetOut(Location::RequiresRegister()); 2699 break; 2700 2701 default: 2702 LOG(FATAL) << "Unexpected type conversion from " << input_type 2703 << " to " << result_type; 2704 } 2705 break; 2706 2707 case DataType::Type::kFloat32: 2708 switch (input_type) { 2709 case DataType::Type::kBool: 2710 case DataType::Type::kUint8: 2711 case DataType::Type::kInt8: 2712 case DataType::Type::kUint16: 2713 case DataType::Type::kInt16: 2714 case DataType::Type::kInt32: 2715 locations->SetInAt(0, Location::Any()); 2716 locations->SetOut(Location::RequiresFpuRegister()); 2717 break; 2718 2719 case DataType::Type::kInt64: 2720 locations->SetInAt(0, Location::Any()); 2721 locations->SetOut(Location::RequiresFpuRegister()); 2722 break; 2723 2724 case DataType::Type::kFloat64: 2725 locations->SetInAt(0, Location::Any()); 2726 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2727 break; 2728 2729 default: 2730 LOG(FATAL) << "Unexpected type conversion from " << input_type 2731 << " to " << result_type; 2732 } 2733 break; 2734 2735 case DataType::Type::kFloat64: 2736 switch (input_type) { 2737 case DataType::Type::kBool: 2738 case DataType::Type::kUint8: 2739 case DataType::Type::kInt8: 2740 case DataType::Type::kUint16: 2741 case DataType::Type::kInt16: 2742 case DataType::Type::kInt32: 2743 locations->SetInAt(0, Location::Any()); 2744 locations->SetOut(Location::RequiresFpuRegister()); 2745 break; 2746 2747 case DataType::Type::kInt64: 2748 locations->SetInAt(0, Location::Any()); 2749 locations->SetOut(Location::RequiresFpuRegister()); 2750 break; 2751 2752 case DataType::Type::kFloat32: 2753 locations->SetInAt(0, Location::Any()); 2754 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2755 break; 2756 2757 default: 2758 LOG(FATAL) << "Unexpected type conversion from " << input_type 2759 << " to " << result_type; 2760 } 2761 break; 2762 2763 default: 2764 LOG(FATAL) << "Unexpected type conversion from " << input_type 2765 << " to " << result_type; 2766 } 2767 } 2768 2769 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2770 LocationSummary* locations = conversion->GetLocations(); 2771 Location out = locations->Out(); 2772 Location in = locations->InAt(0); 2773 DataType::Type result_type = conversion->GetResultType(); 2774 DataType::Type input_type = conversion->GetInputType(); 2775 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2776 << input_type << " -> " << result_type; 2777 switch (result_type) { 2778 case DataType::Type::kUint8: 2779 switch (input_type) { 2780 case DataType::Type::kInt8: 2781 case DataType::Type::kUint16: 2782 case DataType::Type::kInt16: 2783 case DataType::Type::kInt32: 2784 case DataType::Type::kInt64: 2785 if (in.IsRegister()) { 2786 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2787 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2788 __ movzxb(out.AsRegister<CpuRegister>(), 2789 Address(CpuRegister(RSP), in.GetStackIndex())); 2790 } else { 2791 __ movl(out.AsRegister<CpuRegister>(), 2792 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant())))); 2793 } 2794 break; 2795 2796 default: 2797 LOG(FATAL) << "Unexpected type conversion from " << input_type 2798 << " to " << result_type; 2799 } 2800 break; 2801 2802 case DataType::Type::kInt8: 2803 switch (input_type) { 2804 case DataType::Type::kUint8: 2805 case DataType::Type::kUint16: 2806 case DataType::Type::kInt16: 2807 case DataType::Type::kInt32: 2808 case DataType::Type::kInt64: 2809 if (in.IsRegister()) { 2810 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2811 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2812 __ movsxb(out.AsRegister<CpuRegister>(), 2813 Address(CpuRegister(RSP), in.GetStackIndex())); 2814 } else { 2815 __ movl(out.AsRegister<CpuRegister>(), 2816 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant())))); 2817 } 2818 break; 2819 2820 default: 2821 LOG(FATAL) << "Unexpected type conversion from " << input_type 2822 << " to " << result_type; 2823 } 2824 break; 2825 2826 case DataType::Type::kUint16: 2827 switch (input_type) { 2828 case DataType::Type::kInt8: 2829 case DataType::Type::kInt16: 2830 case DataType::Type::kInt32: 2831 case DataType::Type::kInt64: 2832 if (in.IsRegister()) { 2833 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2834 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2835 __ movzxw(out.AsRegister<CpuRegister>(), 2836 Address(CpuRegister(RSP), in.GetStackIndex())); 2837 } else { 2838 __ movl(out.AsRegister<CpuRegister>(), 2839 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); 2840 } 2841 break; 2842 2843 default: 2844 LOG(FATAL) << "Unexpected type conversion from " << input_type 2845 << " to " << result_type; 2846 } 2847 break; 2848 2849 case DataType::Type::kInt16: 2850 switch (input_type) { 2851 case DataType::Type::kUint16: 2852 case DataType::Type::kInt32: 2853 case DataType::Type::kInt64: 2854 if (in.IsRegister()) { 2855 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2856 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2857 __ movsxw(out.AsRegister<CpuRegister>(), 2858 Address(CpuRegister(RSP), in.GetStackIndex())); 2859 } else { 2860 __ movl(out.AsRegister<CpuRegister>(), 2861 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant())))); 2862 } 2863 break; 2864 2865 default: 2866 LOG(FATAL) << "Unexpected type conversion from " << input_type 2867 << " to " << result_type; 2868 } 2869 break; 2870 2871 case DataType::Type::kInt32: 2872 switch (input_type) { 2873 case DataType::Type::kInt64: 2874 if (in.IsRegister()) { 2875 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2876 } else if (in.IsDoubleStackSlot()) { 2877 __ movl(out.AsRegister<CpuRegister>(), 2878 Address(CpuRegister(RSP), in.GetStackIndex())); 2879 } else { 2880 DCHECK(in.IsConstant()); 2881 DCHECK(in.GetConstant()->IsLongConstant()); 2882 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2883 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 2884 } 2885 break; 2886 2887 case DataType::Type::kFloat32: { 2888 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2889 CpuRegister output = out.AsRegister<CpuRegister>(); 2890 NearLabel done, nan; 2891 2892 __ movl(output, Immediate(kPrimIntMax)); 2893 // if input >= (float)INT_MAX goto done 2894 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); 2895 __ j(kAboveEqual, &done); 2896 // if input == NaN goto nan 2897 __ j(kUnordered, &nan); 2898 // output = float-to-int-truncate(input) 2899 __ cvttss2si(output, input, false); 2900 __ jmp(&done); 2901 __ Bind(&nan); 2902 // output = 0 2903 __ xorl(output, output); 2904 __ Bind(&done); 2905 break; 2906 } 2907 2908 case DataType::Type::kFloat64: { 2909 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2910 CpuRegister output = out.AsRegister<CpuRegister>(); 2911 NearLabel done, nan; 2912 2913 __ movl(output, Immediate(kPrimIntMax)); 2914 // if input >= (double)INT_MAX goto done 2915 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); 2916 __ j(kAboveEqual, &done); 2917 // if input == NaN goto nan 2918 __ j(kUnordered, &nan); 2919 // output = double-to-int-truncate(input) 2920 __ cvttsd2si(output, input); 2921 __ jmp(&done); 2922 __ Bind(&nan); 2923 // output = 0 2924 __ xorl(output, output); 2925 __ Bind(&done); 2926 break; 2927 } 2928 2929 default: 2930 LOG(FATAL) << "Unexpected type conversion from " << input_type 2931 << " to " << result_type; 2932 } 2933 break; 2934 2935 case DataType::Type::kInt64: 2936 switch (input_type) { 2937 DCHECK(out.IsRegister()); 2938 case DataType::Type::kBool: 2939 case DataType::Type::kUint8: 2940 case DataType::Type::kInt8: 2941 case DataType::Type::kUint16: 2942 case DataType::Type::kInt16: 2943 case DataType::Type::kInt32: 2944 DCHECK(in.IsRegister()); 2945 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2946 break; 2947 2948 case DataType::Type::kFloat32: { 2949 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2950 CpuRegister output = out.AsRegister<CpuRegister>(); 2951 NearLabel done, nan; 2952 2953 codegen_->Load64BitValue(output, kPrimLongMax); 2954 // if input >= (float)LONG_MAX goto done 2955 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); 2956 __ j(kAboveEqual, &done); 2957 // if input == NaN goto nan 2958 __ j(kUnordered, &nan); 2959 // output = float-to-long-truncate(input) 2960 __ cvttss2si(output, input, true); 2961 __ jmp(&done); 2962 __ Bind(&nan); 2963 // output = 0 2964 __ xorl(output, output); 2965 __ Bind(&done); 2966 break; 2967 } 2968 2969 case DataType::Type::kFloat64: { 2970 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2971 CpuRegister output = out.AsRegister<CpuRegister>(); 2972 NearLabel done, nan; 2973 2974 codegen_->Load64BitValue(output, kPrimLongMax); 2975 // if input >= (double)LONG_MAX goto done 2976 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); 2977 __ j(kAboveEqual, &done); 2978 // if input == NaN goto nan 2979 __ j(kUnordered, &nan); 2980 // output = double-to-long-truncate(input) 2981 __ cvttsd2si(output, input, true); 2982 __ jmp(&done); 2983 __ Bind(&nan); 2984 // output = 0 2985 __ xorl(output, output); 2986 __ Bind(&done); 2987 break; 2988 } 2989 2990 default: 2991 LOG(FATAL) << "Unexpected type conversion from " << input_type 2992 << " to " << result_type; 2993 } 2994 break; 2995 2996 case DataType::Type::kFloat32: 2997 switch (input_type) { 2998 case DataType::Type::kBool: 2999 case DataType::Type::kUint8: 3000 case DataType::Type::kInt8: 3001 case DataType::Type::kUint16: 3002 case DataType::Type::kInt16: 3003 case DataType::Type::kInt32: 3004 if (in.IsRegister()) { 3005 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 3006 } else if (in.IsConstant()) { 3007 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 3008 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3009 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3010 } else { 3011 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 3012 Address(CpuRegister(RSP), in.GetStackIndex()), false); 3013 } 3014 break; 3015 3016 case DataType::Type::kInt64: 3017 if (in.IsRegister()) { 3018 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 3019 } else if (in.IsConstant()) { 3020 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 3021 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3022 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3023 } else { 3024 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 3025 Address(CpuRegister(RSP), in.GetStackIndex()), true); 3026 } 3027 break; 3028 3029 case DataType::Type::kFloat64: 3030 if (in.IsFpuRegister()) { 3031 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 3032 } else if (in.IsConstant()) { 3033 double v = in.GetConstant()->AsDoubleConstant()->GetValue(); 3034 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3035 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3036 } else { 3037 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), 3038 Address(CpuRegister(RSP), in.GetStackIndex())); 3039 } 3040 break; 3041 3042 default: 3043 LOG(FATAL) << "Unexpected type conversion from " << input_type 3044 << " to " << result_type; 3045 } 3046 break; 3047 3048 case DataType::Type::kFloat64: 3049 switch (input_type) { 3050 case DataType::Type::kBool: 3051 case DataType::Type::kUint8: 3052 case DataType::Type::kInt8: 3053 case DataType::Type::kUint16: 3054 case DataType::Type::kInt16: 3055 case DataType::Type::kInt32: 3056 if (in.IsRegister()) { 3057 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 3058 } else if (in.IsConstant()) { 3059 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 3060 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3061 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3062 } else { 3063 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3064 Address(CpuRegister(RSP), in.GetStackIndex()), false); 3065 } 3066 break; 3067 3068 case DataType::Type::kInt64: 3069 if (in.IsRegister()) { 3070 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 3071 } else if (in.IsConstant()) { 3072 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 3073 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3074 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3075 } else { 3076 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3077 Address(CpuRegister(RSP), in.GetStackIndex()), true); 3078 } 3079 break; 3080 3081 case DataType::Type::kFloat32: 3082 if (in.IsFpuRegister()) { 3083 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 3084 } else if (in.IsConstant()) { 3085 float v = in.GetConstant()->AsFloatConstant()->GetValue(); 3086 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3087 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3088 } else { 3089 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), 3090 Address(CpuRegister(RSP), in.GetStackIndex())); 3091 } 3092 break; 3093 3094 default: 3095 LOG(FATAL) << "Unexpected type conversion from " << input_type 3096 << " to " << result_type; 3097 } 3098 break; 3099 3100 default: 3101 LOG(FATAL) << "Unexpected type conversion from " << input_type 3102 << " to " << result_type; 3103 } 3104 } 3105 3106 void LocationsBuilderX86_64::VisitAdd(HAdd* add) { 3107 LocationSummary* locations = 3108 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); 3109 switch (add->GetResultType()) { 3110 case DataType::Type::kInt32: { 3111 locations->SetInAt(0, Location::RequiresRegister()); 3112 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 3113 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3114 break; 3115 } 3116 3117 case DataType::Type::kInt64: { 3118 locations->SetInAt(0, Location::RequiresRegister()); 3119 // We can use a leaq or addq if the constant can fit in an immediate. 3120 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); 3121 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3122 break; 3123 } 3124 3125 case DataType::Type::kFloat64: 3126 case DataType::Type::kFloat32: { 3127 locations->SetInAt(0, Location::RequiresFpuRegister()); 3128 locations->SetInAt(1, Location::Any()); 3129 locations->SetOut(Location::SameAsFirstInput()); 3130 break; 3131 } 3132 3133 default: 3134 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3135 } 3136 } 3137 3138 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { 3139 LocationSummary* locations = add->GetLocations(); 3140 Location first = locations->InAt(0); 3141 Location second = locations->InAt(1); 3142 Location out = locations->Out(); 3143 3144 switch (add->GetResultType()) { 3145 case DataType::Type::kInt32: { 3146 if (second.IsRegister()) { 3147 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3148 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3149 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3150 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3151 } else { 3152 __ leal(out.AsRegister<CpuRegister>(), Address( 3153 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3154 } 3155 } else if (second.IsConstant()) { 3156 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3157 __ addl(out.AsRegister<CpuRegister>(), 3158 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 3159 } else { 3160 __ leal(out.AsRegister<CpuRegister>(), Address( 3161 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); 3162 } 3163 } else { 3164 DCHECK(first.Equals(locations->Out())); 3165 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3166 } 3167 break; 3168 } 3169 3170 case DataType::Type::kInt64: { 3171 if (second.IsRegister()) { 3172 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3173 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3174 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3175 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3176 } else { 3177 __ leaq(out.AsRegister<CpuRegister>(), Address( 3178 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3179 } 3180 } else { 3181 DCHECK(second.IsConstant()); 3182 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3183 int32_t int32_value = Low32Bits(value); 3184 DCHECK_EQ(int32_value, value); 3185 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3186 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value)); 3187 } else { 3188 __ leaq(out.AsRegister<CpuRegister>(), Address( 3189 first.AsRegister<CpuRegister>(), int32_value)); 3190 } 3191 } 3192 break; 3193 } 3194 3195 case DataType::Type::kFloat32: { 3196 if (second.IsFpuRegister()) { 3197 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3198 } else if (second.IsConstant()) { 3199 __ addss(first.AsFpuRegister<XmmRegister>(), 3200 codegen_->LiteralFloatAddress( 3201 second.GetConstant()->AsFloatConstant()->GetValue())); 3202 } else { 3203 DCHECK(second.IsStackSlot()); 3204 __ addss(first.AsFpuRegister<XmmRegister>(), 3205 Address(CpuRegister(RSP), second.GetStackIndex())); 3206 } 3207 break; 3208 } 3209 3210 case DataType::Type::kFloat64: { 3211 if (second.IsFpuRegister()) { 3212 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3213 } else if (second.IsConstant()) { 3214 __ addsd(first.AsFpuRegister<XmmRegister>(), 3215 codegen_->LiteralDoubleAddress( 3216 second.GetConstant()->AsDoubleConstant()->GetValue())); 3217 } else { 3218 DCHECK(second.IsDoubleStackSlot()); 3219 __ addsd(first.AsFpuRegister<XmmRegister>(), 3220 Address(CpuRegister(RSP), second.GetStackIndex())); 3221 } 3222 break; 3223 } 3224 3225 default: 3226 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3227 } 3228 } 3229 3230 void LocationsBuilderX86_64::VisitSub(HSub* sub) { 3231 LocationSummary* locations = 3232 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); 3233 switch (sub->GetResultType()) { 3234 case DataType::Type::kInt32: { 3235 locations->SetInAt(0, Location::RequiresRegister()); 3236 locations->SetInAt(1, Location::Any()); 3237 locations->SetOut(Location::SameAsFirstInput()); 3238 break; 3239 } 3240 case DataType::Type::kInt64: { 3241 locations->SetInAt(0, Location::RequiresRegister()); 3242 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); 3243 locations->SetOut(Location::SameAsFirstInput()); 3244 break; 3245 } 3246 case DataType::Type::kFloat32: 3247 case DataType::Type::kFloat64: { 3248 locations->SetInAt(0, Location::RequiresFpuRegister()); 3249 locations->SetInAt(1, Location::Any()); 3250 locations->SetOut(Location::SameAsFirstInput()); 3251 break; 3252 } 3253 default: 3254 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3255 } 3256 } 3257 3258 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { 3259 LocationSummary* locations = sub->GetLocations(); 3260 Location first = locations->InAt(0); 3261 Location second = locations->InAt(1); 3262 DCHECK(first.Equals(locations->Out())); 3263 switch (sub->GetResultType()) { 3264 case DataType::Type::kInt32: { 3265 if (second.IsRegister()) { 3266 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3267 } else if (second.IsConstant()) { 3268 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 3269 __ subl(first.AsRegister<CpuRegister>(), imm); 3270 } else { 3271 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3272 } 3273 break; 3274 } 3275 case DataType::Type::kInt64: { 3276 if (second.IsConstant()) { 3277 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3278 DCHECK(IsInt<32>(value)); 3279 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 3280 } else { 3281 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3282 } 3283 break; 3284 } 3285 3286 case DataType::Type::kFloat32: { 3287 if (second.IsFpuRegister()) { 3288 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3289 } else if (second.IsConstant()) { 3290 __ subss(first.AsFpuRegister<XmmRegister>(), 3291 codegen_->LiteralFloatAddress( 3292 second.GetConstant()->AsFloatConstant()->GetValue())); 3293 } else { 3294 DCHECK(second.IsStackSlot()); 3295 __ subss(first.AsFpuRegister<XmmRegister>(), 3296 Address(CpuRegister(RSP), second.GetStackIndex())); 3297 } 3298 break; 3299 } 3300 3301 case DataType::Type::kFloat64: { 3302 if (second.IsFpuRegister()) { 3303 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3304 } else if (second.IsConstant()) { 3305 __ subsd(first.AsFpuRegister<XmmRegister>(), 3306 codegen_->LiteralDoubleAddress( 3307 second.GetConstant()->AsDoubleConstant()->GetValue())); 3308 } else { 3309 DCHECK(second.IsDoubleStackSlot()); 3310 __ subsd(first.AsFpuRegister<XmmRegister>(), 3311 Address(CpuRegister(RSP), second.GetStackIndex())); 3312 } 3313 break; 3314 } 3315 3316 default: 3317 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3318 } 3319 } 3320 3321 void LocationsBuilderX86_64::VisitMul(HMul* mul) { 3322 LocationSummary* locations = 3323 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 3324 switch (mul->GetResultType()) { 3325 case DataType::Type::kInt32: { 3326 locations->SetInAt(0, Location::RequiresRegister()); 3327 locations->SetInAt(1, Location::Any()); 3328 if (mul->InputAt(1)->IsIntConstant()) { 3329 // Can use 3 operand multiply. 3330 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3331 } else { 3332 locations->SetOut(Location::SameAsFirstInput()); 3333 } 3334 break; 3335 } 3336 case DataType::Type::kInt64: { 3337 locations->SetInAt(0, Location::RequiresRegister()); 3338 locations->SetInAt(1, Location::Any()); 3339 if (mul->InputAt(1)->IsLongConstant() && 3340 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) { 3341 // Can use 3 operand multiply. 3342 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3343 } else { 3344 locations->SetOut(Location::SameAsFirstInput()); 3345 } 3346 break; 3347 } 3348 case DataType::Type::kFloat32: 3349 case DataType::Type::kFloat64: { 3350 locations->SetInAt(0, Location::RequiresFpuRegister()); 3351 locations->SetInAt(1, Location::Any()); 3352 locations->SetOut(Location::SameAsFirstInput()); 3353 break; 3354 } 3355 3356 default: 3357 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3358 } 3359 } 3360 3361 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { 3362 LocationSummary* locations = mul->GetLocations(); 3363 Location first = locations->InAt(0); 3364 Location second = locations->InAt(1); 3365 Location out = locations->Out(); 3366 switch (mul->GetResultType()) { 3367 case DataType::Type::kInt32: 3368 // The constant may have ended up in a register, so test explicitly to avoid 3369 // problems where the output may not be the same as the first operand. 3370 if (mul->InputAt(1)->IsIntConstant()) { 3371 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); 3372 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm); 3373 } else if (second.IsRegister()) { 3374 DCHECK(first.Equals(out)); 3375 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3376 } else { 3377 DCHECK(first.Equals(out)); 3378 DCHECK(second.IsStackSlot()); 3379 __ imull(first.AsRegister<CpuRegister>(), 3380 Address(CpuRegister(RSP), second.GetStackIndex())); 3381 } 3382 break; 3383 case DataType::Type::kInt64: { 3384 // The constant may have ended up in a register, so test explicitly to avoid 3385 // problems where the output may not be the same as the first operand. 3386 if (mul->InputAt(1)->IsLongConstant()) { 3387 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue(); 3388 if (IsInt<32>(value)) { 3389 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), 3390 Immediate(static_cast<int32_t>(value))); 3391 } else { 3392 // Have to use the constant area. 3393 DCHECK(first.Equals(out)); 3394 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); 3395 } 3396 } else if (second.IsRegister()) { 3397 DCHECK(first.Equals(out)); 3398 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3399 } else { 3400 DCHECK(second.IsDoubleStackSlot()); 3401 DCHECK(first.Equals(out)); 3402 __ imulq(first.AsRegister<CpuRegister>(), 3403 Address(CpuRegister(RSP), second.GetStackIndex())); 3404 } 3405 break; 3406 } 3407 3408 case DataType::Type::kFloat32: { 3409 DCHECK(first.Equals(out)); 3410 if (second.IsFpuRegister()) { 3411 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3412 } else if (second.IsConstant()) { 3413 __ mulss(first.AsFpuRegister<XmmRegister>(), 3414 codegen_->LiteralFloatAddress( 3415 second.GetConstant()->AsFloatConstant()->GetValue())); 3416 } else { 3417 DCHECK(second.IsStackSlot()); 3418 __ mulss(first.AsFpuRegister<XmmRegister>(), 3419 Address(CpuRegister(RSP), second.GetStackIndex())); 3420 } 3421 break; 3422 } 3423 3424 case DataType::Type::kFloat64: { 3425 DCHECK(first.Equals(out)); 3426 if (second.IsFpuRegister()) { 3427 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3428 } else if (second.IsConstant()) { 3429 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3430 codegen_->LiteralDoubleAddress( 3431 second.GetConstant()->AsDoubleConstant()->GetValue())); 3432 } else { 3433 DCHECK(second.IsDoubleStackSlot()); 3434 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3435 Address(CpuRegister(RSP), second.GetStackIndex())); 3436 } 3437 break; 3438 } 3439 3440 default: 3441 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3442 } 3443 } 3444 3445 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, 3446 uint32_t stack_adjustment, bool is_float) { 3447 if (source.IsStackSlot()) { 3448 DCHECK(is_float); 3449 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3450 } else if (source.IsDoubleStackSlot()) { 3451 DCHECK(!is_float); 3452 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3453 } else { 3454 // Write the value to the temporary location on the stack and load to FP stack. 3455 if (is_float) { 3456 Location stack_temp = Location::StackSlot(temp_offset); 3457 codegen_->Move(stack_temp, source); 3458 __ flds(Address(CpuRegister(RSP), temp_offset)); 3459 } else { 3460 Location stack_temp = Location::DoubleStackSlot(temp_offset); 3461 codegen_->Move(stack_temp, source); 3462 __ fldl(Address(CpuRegister(RSP), temp_offset)); 3463 } 3464 } 3465 } 3466 3467 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { 3468 DataType::Type type = rem->GetResultType(); 3469 bool is_float = type == DataType::Type::kFloat32; 3470 size_t elem_size = DataType::Size(type); 3471 LocationSummary* locations = rem->GetLocations(); 3472 Location first = locations->InAt(0); 3473 Location second = locations->InAt(1); 3474 Location out = locations->Out(); 3475 3476 // Create stack space for 2 elements. 3477 // TODO: enhance register allocator to ask for stack temporaries. 3478 __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); 3479 3480 // Load the values to the FP stack in reverse order, using temporaries if needed. 3481 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); 3482 PushOntoFPStack(first, 0, 2 * elem_size, is_float); 3483 3484 // Loop doing FPREM until we stabilize. 3485 NearLabel retry; 3486 __ Bind(&retry); 3487 __ fprem(); 3488 3489 // Move FP status to AX. 3490 __ fstsw(); 3491 3492 // And see if the argument reduction is complete. This is signaled by the 3493 // C2 FPU flag bit set to 0. 3494 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); 3495 __ j(kNotEqual, &retry); 3496 3497 // We have settled on the final value. Retrieve it into an XMM register. 3498 // Store FP top of stack to real stack. 3499 if (is_float) { 3500 __ fsts(Address(CpuRegister(RSP), 0)); 3501 } else { 3502 __ fstl(Address(CpuRegister(RSP), 0)); 3503 } 3504 3505 // Pop the 2 items from the FP stack. 3506 __ fucompp(); 3507 3508 // Load the value from the stack into an XMM register. 3509 DCHECK(out.IsFpuRegister()) << out; 3510 if (is_float) { 3511 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3512 } else { 3513 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3514 } 3515 3516 // And remove the temporary stack space we allocated. 3517 __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); 3518 } 3519 3520 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3521 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3522 3523 LocationSummary* locations = instruction->GetLocations(); 3524 Location second = locations->InAt(1); 3525 DCHECK(second.IsConstant()); 3526 3527 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3528 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); 3529 int64_t imm = Int64FromConstant(second.GetConstant()); 3530 3531 DCHECK(imm == 1 || imm == -1); 3532 3533 switch (instruction->GetResultType()) { 3534 case DataType::Type::kInt32: { 3535 if (instruction->IsRem()) { 3536 __ xorl(output_register, output_register); 3537 } else { 3538 __ movl(output_register, input_register); 3539 if (imm == -1) { 3540 __ negl(output_register); 3541 } 3542 } 3543 break; 3544 } 3545 3546 case DataType::Type::kInt64: { 3547 if (instruction->IsRem()) { 3548 __ xorl(output_register, output_register); 3549 } else { 3550 __ movq(output_register, input_register); 3551 if (imm == -1) { 3552 __ negq(output_register); 3553 } 3554 } 3555 break; 3556 } 3557 3558 default: 3559 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); 3560 } 3561 } 3562 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) { 3563 LocationSummary* locations = instruction->GetLocations(); 3564 Location second = locations->InAt(1); 3565 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3566 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3567 int64_t imm = Int64FromConstant(second.GetConstant()); 3568 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3569 uint64_t abs_imm = AbsOrMin(imm); 3570 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3571 if (instruction->GetResultType() == DataType::Type::kInt32) { 3572 NearLabel done; 3573 __ movl(out, numerator); 3574 __ andl(out, Immediate(abs_imm-1)); 3575 __ j(Condition::kZero, &done); 3576 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); 3577 __ testl(numerator, numerator); 3578 __ cmov(Condition::kLess, out, tmp, false); 3579 __ Bind(&done); 3580 3581 } else { 3582 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3583 codegen_->Load64BitValue(tmp, abs_imm - 1); 3584 NearLabel done; 3585 3586 __ movq(out, numerator); 3587 __ andq(out, tmp); 3588 __ j(Condition::kZero, &done); 3589 __ movq(tmp, numerator); 3590 __ sarq(tmp, Immediate(63)); 3591 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm))); 3592 __ orq(out, tmp); 3593 __ Bind(&done); 3594 } 3595 } 3596 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { 3597 LocationSummary* locations = instruction->GetLocations(); 3598 Location second = locations->InAt(1); 3599 3600 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3601 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3602 3603 int64_t imm = Int64FromConstant(second.GetConstant()); 3604 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3605 uint64_t abs_imm = AbsOrMin(imm); 3606 3607 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3608 3609 if (instruction->GetResultType() == DataType::Type::kInt32) { 3610 // When denominator is equal to 2, we can add signed bit and numerator to tmp. 3611 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit. 3612 if (abs_imm == 2) { 3613 __ leal(tmp, Address(numerator, 0)); 3614 __ shrl(tmp, Immediate(31)); 3615 __ addl(tmp, numerator); 3616 } else { 3617 __ leal(tmp, Address(numerator, abs_imm - 1)); 3618 __ testl(numerator, numerator); 3619 __ cmov(kGreaterEqual, tmp, numerator); 3620 } 3621 int shift = CTZ(imm); 3622 __ sarl(tmp, Immediate(shift)); 3623 3624 if (imm < 0) { 3625 __ negl(tmp); 3626 } 3627 3628 __ movl(output_register, tmp); 3629 } else { 3630 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3631 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); 3632 if (abs_imm == 2) { 3633 __ movq(rdx, numerator); 3634 __ shrq(rdx, Immediate(63)); 3635 __ addq(rdx, numerator); 3636 } else { 3637 codegen_->Load64BitValue(rdx, abs_imm - 1); 3638 __ addq(rdx, numerator); 3639 __ testq(numerator, numerator); 3640 __ cmov(kGreaterEqual, rdx, numerator); 3641 } 3642 int shift = CTZ(imm); 3643 __ sarq(rdx, Immediate(shift)); 3644 3645 if (imm < 0) { 3646 __ negq(rdx); 3647 } 3648 3649 __ movq(output_register, rdx); 3650 } 3651 } 3652 3653 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3654 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3655 3656 LocationSummary* locations = instruction->GetLocations(); 3657 Location second = locations->InAt(1); 3658 3659 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() 3660 : locations->GetTemp(0).AsRegister<CpuRegister>(); 3661 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); 3662 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() 3663 : locations->Out().AsRegister<CpuRegister>(); 3664 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3665 3666 DCHECK_EQ(RAX, eax.AsRegister()); 3667 DCHECK_EQ(RDX, edx.AsRegister()); 3668 if (instruction->IsDiv()) { 3669 DCHECK_EQ(RAX, out.AsRegister()); 3670 } else { 3671 DCHECK_EQ(RDX, out.AsRegister()); 3672 } 3673 3674 int64_t magic; 3675 int shift; 3676 3677 // TODO: can these branches be written as one? 3678 if (instruction->GetResultType() == DataType::Type::kInt32) { 3679 int imm = second.GetConstant()->AsIntConstant()->GetValue(); 3680 3681 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); 3682 3683 __ movl(numerator, eax); 3684 3685 __ movl(eax, Immediate(magic)); 3686 __ imull(numerator); 3687 3688 if (imm > 0 && magic < 0) { 3689 __ addl(edx, numerator); 3690 } else if (imm < 0 && magic > 0) { 3691 __ subl(edx, numerator); 3692 } 3693 3694 if (shift != 0) { 3695 __ sarl(edx, Immediate(shift)); 3696 } 3697 3698 __ movl(eax, edx); 3699 __ shrl(edx, Immediate(31)); 3700 __ addl(edx, eax); 3701 3702 if (instruction->IsRem()) { 3703 __ movl(eax, numerator); 3704 __ imull(edx, Immediate(imm)); 3705 __ subl(eax, edx); 3706 __ movl(edx, eax); 3707 } else { 3708 __ movl(eax, edx); 3709 } 3710 } else { 3711 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); 3712 3713 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3714 3715 CpuRegister rax = eax; 3716 CpuRegister rdx = edx; 3717 3718 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift); 3719 3720 // Save the numerator. 3721 __ movq(numerator, rax); 3722 3723 // RAX = magic 3724 codegen_->Load64BitValue(rax, magic); 3725 3726 // RDX:RAX = magic * numerator 3727 __ imulq(numerator); 3728 3729 if (imm > 0 && magic < 0) { 3730 // RDX += numerator 3731 __ addq(rdx, numerator); 3732 } else if (imm < 0 && magic > 0) { 3733 // RDX -= numerator 3734 __ subq(rdx, numerator); 3735 } 3736 3737 // Shift if needed. 3738 if (shift != 0) { 3739 __ sarq(rdx, Immediate(shift)); 3740 } 3741 3742 // RDX += 1 if RDX < 0 3743 __ movq(rax, rdx); 3744 __ shrq(rdx, Immediate(63)); 3745 __ addq(rdx, rax); 3746 3747 if (instruction->IsRem()) { 3748 __ movq(rax, numerator); 3749 3750 if (IsInt<32>(imm)) { 3751 __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); 3752 } else { 3753 __ imulq(rdx, codegen_->LiteralInt64Address(imm)); 3754 } 3755 3756 __ subq(rax, rdx); 3757 __ movq(rdx, rax); 3758 } else { 3759 __ movq(rax, rdx); 3760 } 3761 } 3762 } 3763 3764 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3765 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3766 DataType::Type type = instruction->GetResultType(); 3767 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3768 3769 bool is_div = instruction->IsDiv(); 3770 LocationSummary* locations = instruction->GetLocations(); 3771 3772 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3773 Location second = locations->InAt(1); 3774 3775 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); 3776 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); 3777 3778 if (second.IsConstant()) { 3779 int64_t imm = Int64FromConstant(second.GetConstant()); 3780 3781 if (imm == 0) { 3782 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3783 } else if (imm == 1 || imm == -1) { 3784 DivRemOneOrMinusOne(instruction); 3785 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 3786 if (is_div) { 3787 DivByPowerOfTwo(instruction->AsDiv()); 3788 } else { 3789 RemByPowerOfTwo(instruction->AsRem()); 3790 } 3791 } else { 3792 DCHECK(imm <= -2 || imm >= 2); 3793 GenerateDivRemWithAnyConstant(instruction); 3794 } 3795 } else { 3796 SlowPathCode* slow_path = 3797 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64( 3798 instruction, out.AsRegister(), type, is_div); 3799 codegen_->AddSlowPath(slow_path); 3800 3801 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3802 // 0x80000000(00000000)/-1 triggers an arithmetic exception! 3803 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) 3804 // so it's safe to just use negl instead of more complex comparisons. 3805 if (type == DataType::Type::kInt32) { 3806 __ cmpl(second_reg, Immediate(-1)); 3807 __ j(kEqual, slow_path->GetEntryLabel()); 3808 // edx:eax <- sign-extended of eax 3809 __ cdq(); 3810 // eax = quotient, edx = remainder 3811 __ idivl(second_reg); 3812 } else { 3813 __ cmpq(second_reg, Immediate(-1)); 3814 __ j(kEqual, slow_path->GetEntryLabel()); 3815 // rdx:rax <- sign-extended of rax 3816 __ cqo(); 3817 // rax = quotient, rdx = remainder 3818 __ idivq(second_reg); 3819 } 3820 __ Bind(slow_path->GetExitLabel()); 3821 } 3822 } 3823 3824 void LocationsBuilderX86_64::VisitDiv(HDiv* div) { 3825 LocationSummary* locations = 3826 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 3827 switch (div->GetResultType()) { 3828 case DataType::Type::kInt32: 3829 case DataType::Type::kInt64: { 3830 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3831 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3832 locations->SetOut(Location::SameAsFirstInput()); 3833 // Intel uses edx:eax as the dividend. 3834 locations->AddTemp(Location::RegisterLocation(RDX)); 3835 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way 3836 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as 3837 // output and request another temp. 3838 if (div->InputAt(1)->IsConstant()) { 3839 locations->AddTemp(Location::RequiresRegister()); 3840 } 3841 break; 3842 } 3843 3844 case DataType::Type::kFloat32: 3845 case DataType::Type::kFloat64: { 3846 locations->SetInAt(0, Location::RequiresFpuRegister()); 3847 locations->SetInAt(1, Location::Any()); 3848 locations->SetOut(Location::SameAsFirstInput()); 3849 break; 3850 } 3851 3852 default: 3853 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3854 } 3855 } 3856 3857 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { 3858 LocationSummary* locations = div->GetLocations(); 3859 Location first = locations->InAt(0); 3860 Location second = locations->InAt(1); 3861 DCHECK(first.Equals(locations->Out())); 3862 3863 DataType::Type type = div->GetResultType(); 3864 switch (type) { 3865 case DataType::Type::kInt32: 3866 case DataType::Type::kInt64: { 3867 GenerateDivRemIntegral(div); 3868 break; 3869 } 3870 3871 case DataType::Type::kFloat32: { 3872 if (second.IsFpuRegister()) { 3873 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3874 } else if (second.IsConstant()) { 3875 __ divss(first.AsFpuRegister<XmmRegister>(), 3876 codegen_->LiteralFloatAddress( 3877 second.GetConstant()->AsFloatConstant()->GetValue())); 3878 } else { 3879 DCHECK(second.IsStackSlot()); 3880 __ divss(first.AsFpuRegister<XmmRegister>(), 3881 Address(CpuRegister(RSP), second.GetStackIndex())); 3882 } 3883 break; 3884 } 3885 3886 case DataType::Type::kFloat64: { 3887 if (second.IsFpuRegister()) { 3888 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3889 } else if (second.IsConstant()) { 3890 __ divsd(first.AsFpuRegister<XmmRegister>(), 3891 codegen_->LiteralDoubleAddress( 3892 second.GetConstant()->AsDoubleConstant()->GetValue())); 3893 } else { 3894 DCHECK(second.IsDoubleStackSlot()); 3895 __ divsd(first.AsFpuRegister<XmmRegister>(), 3896 Address(CpuRegister(RSP), second.GetStackIndex())); 3897 } 3898 break; 3899 } 3900 3901 default: 3902 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3903 } 3904 } 3905 3906 void LocationsBuilderX86_64::VisitRem(HRem* rem) { 3907 DataType::Type type = rem->GetResultType(); 3908 LocationSummary* locations = 3909 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall); 3910 3911 switch (type) { 3912 case DataType::Type::kInt32: 3913 case DataType::Type::kInt64: { 3914 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3915 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 3916 // Intel uses rdx:rax as the dividend and puts the remainder in rdx 3917 locations->SetOut(Location::RegisterLocation(RDX)); 3918 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 3919 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as 3920 // output and request another temp. 3921 if (rem->InputAt(1)->IsConstant()) { 3922 locations->AddTemp(Location::RequiresRegister()); 3923 } 3924 break; 3925 } 3926 3927 case DataType::Type::kFloat32: 3928 case DataType::Type::kFloat64: { 3929 locations->SetInAt(0, Location::Any()); 3930 locations->SetInAt(1, Location::Any()); 3931 locations->SetOut(Location::RequiresFpuRegister()); 3932 locations->AddTemp(Location::RegisterLocation(RAX)); 3933 break; 3934 } 3935 3936 default: 3937 LOG(FATAL) << "Unexpected rem type " << type; 3938 } 3939 } 3940 3941 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { 3942 DataType::Type type = rem->GetResultType(); 3943 switch (type) { 3944 case DataType::Type::kInt32: 3945 case DataType::Type::kInt64: { 3946 GenerateDivRemIntegral(rem); 3947 break; 3948 } 3949 case DataType::Type::kFloat32: 3950 case DataType::Type::kFloat64: { 3951 GenerateRemFP(rem); 3952 break; 3953 } 3954 default: 3955 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); 3956 } 3957 } 3958 3959 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { 3960 LocationSummary* locations = new (allocator) LocationSummary(minmax); 3961 switch (minmax->GetResultType()) { 3962 case DataType::Type::kInt32: 3963 case DataType::Type::kInt64: 3964 locations->SetInAt(0, Location::RequiresRegister()); 3965 locations->SetInAt(1, Location::RequiresRegister()); 3966 locations->SetOut(Location::SameAsFirstInput()); 3967 break; 3968 case DataType::Type::kFloat32: 3969 case DataType::Type::kFloat64: 3970 locations->SetInAt(0, Location::RequiresFpuRegister()); 3971 locations->SetInAt(1, Location::RequiresFpuRegister()); 3972 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 3973 // the second input to be the output (we can simply swap inputs). 3974 locations->SetOut(Location::SameAsFirstInput()); 3975 break; 3976 default: 3977 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); 3978 } 3979 } 3980 3981 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, 3982 bool is_min, 3983 DataType::Type type) { 3984 Location op1_loc = locations->InAt(0); 3985 Location op2_loc = locations->InAt(1); 3986 3987 // Shortcut for same input locations. 3988 if (op1_loc.Equals(op2_loc)) { 3989 // Can return immediately, as op1_loc == out_loc. 3990 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 3991 // a copy here. 3992 DCHECK(locations->Out().Equals(op1_loc)); 3993 return; 3994 } 3995 3996 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3997 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 3998 3999 // (out := op1) 4000 // out <=? op2 4001 // if out is min jmp done 4002 // out := op2 4003 // done: 4004 4005 if (type == DataType::Type::kInt64) { 4006 __ cmpq(out, op2); 4007 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); 4008 } else { 4009 DCHECK_EQ(type, DataType::Type::kInt32); 4010 __ cmpl(out, op2); 4011 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); 4012 } 4013 } 4014 4015 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, 4016 bool is_min, 4017 DataType::Type type) { 4018 Location op1_loc = locations->InAt(0); 4019 Location op2_loc = locations->InAt(1); 4020 Location out_loc = locations->Out(); 4021 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4022 4023 // Shortcut for same input locations. 4024 if (op1_loc.Equals(op2_loc)) { 4025 DCHECK(out_loc.Equals(op1_loc)); 4026 return; 4027 } 4028 4029 // (out := op1) 4030 // out <=? op2 4031 // if Nan jmp Nan_label 4032 // if out is min jmp done 4033 // if op2 is min jmp op2_label 4034 // handle -0/+0 4035 // jmp done 4036 // Nan_label: 4037 // out := NaN 4038 // op2_label: 4039 // out := op2 4040 // done: 4041 // 4042 // This removes one jmp, but needs to copy one input (op1) to out. 4043 // 4044 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 4045 4046 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 4047 4048 NearLabel nan, done, op2_label; 4049 if (type == DataType::Type::kFloat64) { 4050 __ ucomisd(out, op2); 4051 } else { 4052 DCHECK_EQ(type, DataType::Type::kFloat32); 4053 __ ucomiss(out, op2); 4054 } 4055 4056 __ j(Condition::kParityEven, &nan); 4057 4058 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 4059 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 4060 4061 // Handle 0.0/-0.0. 4062 if (is_min) { 4063 if (type == DataType::Type::kFloat64) { 4064 __ orpd(out, op2); 4065 } else { 4066 __ orps(out, op2); 4067 } 4068 } else { 4069 if (type == DataType::Type::kFloat64) { 4070 __ andpd(out, op2); 4071 } else { 4072 __ andps(out, op2); 4073 } 4074 } 4075 __ jmp(&done); 4076 4077 // NaN handling. 4078 __ Bind(&nan); 4079 if (type == DataType::Type::kFloat64) { 4080 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 4081 } else { 4082 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); 4083 } 4084 __ jmp(&done); 4085 4086 // out := op2; 4087 __ Bind(&op2_label); 4088 if (type == DataType::Type::kFloat64) { 4089 __ movsd(out, op2); 4090 } else { 4091 __ movss(out, op2); 4092 } 4093 4094 // Done. 4095 __ Bind(&done); 4096 } 4097 4098 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { 4099 DataType::Type type = minmax->GetResultType(); 4100 switch (type) { 4101 case DataType::Type::kInt32: 4102 case DataType::Type::kInt64: 4103 GenerateMinMaxInt(minmax->GetLocations(), is_min, type); 4104 break; 4105 case DataType::Type::kFloat32: 4106 case DataType::Type::kFloat64: 4107 GenerateMinMaxFP(minmax->GetLocations(), is_min, type); 4108 break; 4109 default: 4110 LOG(FATAL) << "Unexpected type for HMinMax " << type; 4111 } 4112 } 4113 4114 void LocationsBuilderX86_64::VisitMin(HMin* min) { 4115 CreateMinMaxLocations(GetGraph()->GetAllocator(), min); 4116 } 4117 4118 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { 4119 GenerateMinMax(min, /*is_min*/ true); 4120 } 4121 4122 void LocationsBuilderX86_64::VisitMax(HMax* max) { 4123 CreateMinMaxLocations(GetGraph()->GetAllocator(), max); 4124 } 4125 4126 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { 4127 GenerateMinMax(max, /*is_min*/ false); 4128 } 4129 4130 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { 4131 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); 4132 switch (abs->GetResultType()) { 4133 case DataType::Type::kInt32: 4134 case DataType::Type::kInt64: 4135 locations->SetInAt(0, Location::RequiresRegister()); 4136 locations->SetOut(Location::SameAsFirstInput()); 4137 locations->AddTemp(Location::RequiresRegister()); 4138 break; 4139 case DataType::Type::kFloat32: 4140 case DataType::Type::kFloat64: 4141 locations->SetInAt(0, Location::RequiresFpuRegister()); 4142 locations->SetOut(Location::SameAsFirstInput()); 4143 locations->AddTemp(Location::RequiresFpuRegister()); 4144 break; 4145 default: 4146 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4147 } 4148 } 4149 4150 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { 4151 LocationSummary* locations = abs->GetLocations(); 4152 switch (abs->GetResultType()) { 4153 case DataType::Type::kInt32: { 4154 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 4155 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 4156 // Create mask. 4157 __ movl(mask, out); 4158 __ sarl(mask, Immediate(31)); 4159 // Add mask. 4160 __ addl(out, mask); 4161 __ xorl(out, mask); 4162 break; 4163 } 4164 case DataType::Type::kInt64: { 4165 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 4166 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 4167 // Create mask. 4168 __ movq(mask, out); 4169 __ sarq(mask, Immediate(63)); 4170 // Add mask. 4171 __ addq(out, mask); 4172 __ xorq(out, mask); 4173 break; 4174 } 4175 case DataType::Type::kFloat32: { 4176 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4177 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4178 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 4179 __ andps(out, mask); 4180 break; 4181 } 4182 case DataType::Type::kFloat64: { 4183 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4184 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4185 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 4186 __ andpd(out, mask); 4187 break; 4188 } 4189 default: 4190 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4191 } 4192 } 4193 4194 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4195 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4196 locations->SetInAt(0, Location::Any()); 4197 } 4198 4199 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4200 SlowPathCode* slow_path = 4201 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction); 4202 codegen_->AddSlowPath(slow_path); 4203 4204 LocationSummary* locations = instruction->GetLocations(); 4205 Location value = locations->InAt(0); 4206 4207 switch (instruction->GetType()) { 4208 case DataType::Type::kBool: 4209 case DataType::Type::kUint8: 4210 case DataType::Type::kInt8: 4211 case DataType::Type::kUint16: 4212 case DataType::Type::kInt16: 4213 case DataType::Type::kInt32: { 4214 if (value.IsRegister()) { 4215 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 4216 __ j(kEqual, slow_path->GetEntryLabel()); 4217 } else if (value.IsStackSlot()) { 4218 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 4219 __ j(kEqual, slow_path->GetEntryLabel()); 4220 } else { 4221 DCHECK(value.IsConstant()) << value; 4222 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { 4223 __ jmp(slow_path->GetEntryLabel()); 4224 } 4225 } 4226 break; 4227 } 4228 case DataType::Type::kInt64: { 4229 if (value.IsRegister()) { 4230 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 4231 __ j(kEqual, slow_path->GetEntryLabel()); 4232 } else if (value.IsDoubleStackSlot()) { 4233 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 4234 __ j(kEqual, slow_path->GetEntryLabel()); 4235 } else { 4236 DCHECK(value.IsConstant()) << value; 4237 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { 4238 __ jmp(slow_path->GetEntryLabel()); 4239 } 4240 } 4241 break; 4242 } 4243 default: 4244 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 4245 } 4246 } 4247 4248 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { 4249 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4250 4251 LocationSummary* locations = 4252 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); 4253 4254 switch (op->GetResultType()) { 4255 case DataType::Type::kInt32: 4256 case DataType::Type::kInt64: { 4257 locations->SetInAt(0, Location::RequiresRegister()); 4258 // The shift count needs to be in CL. 4259 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); 4260 locations->SetOut(Location::SameAsFirstInput()); 4261 break; 4262 } 4263 default: 4264 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 4265 } 4266 } 4267 4268 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { 4269 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4270 4271 LocationSummary* locations = op->GetLocations(); 4272 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 4273 Location second = locations->InAt(1); 4274 4275 switch (op->GetResultType()) { 4276 case DataType::Type::kInt32: { 4277 if (second.IsRegister()) { 4278 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4279 if (op->IsShl()) { 4280 __ shll(first_reg, second_reg); 4281 } else if (op->IsShr()) { 4282 __ sarl(first_reg, second_reg); 4283 } else { 4284 __ shrl(first_reg, second_reg); 4285 } 4286 } else { 4287 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4288 if (op->IsShl()) { 4289 __ shll(first_reg, imm); 4290 } else if (op->IsShr()) { 4291 __ sarl(first_reg, imm); 4292 } else { 4293 __ shrl(first_reg, imm); 4294 } 4295 } 4296 break; 4297 } 4298 case DataType::Type::kInt64: { 4299 if (second.IsRegister()) { 4300 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4301 if (op->IsShl()) { 4302 __ shlq(first_reg, second_reg); 4303 } else if (op->IsShr()) { 4304 __ sarq(first_reg, second_reg); 4305 } else { 4306 __ shrq(first_reg, second_reg); 4307 } 4308 } else { 4309 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 4310 if (op->IsShl()) { 4311 __ shlq(first_reg, imm); 4312 } else if (op->IsShr()) { 4313 __ sarq(first_reg, imm); 4314 } else { 4315 __ shrq(first_reg, imm); 4316 } 4317 } 4318 break; 4319 } 4320 default: 4321 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 4322 UNREACHABLE(); 4323 } 4324 } 4325 4326 void LocationsBuilderX86_64::VisitRor(HRor* ror) { 4327 LocationSummary* locations = 4328 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); 4329 4330 switch (ror->GetResultType()) { 4331 case DataType::Type::kInt32: 4332 case DataType::Type::kInt64: { 4333 locations->SetInAt(0, Location::RequiresRegister()); 4334 // The shift count needs to be in CL (unless it is a constant). 4335 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); 4336 locations->SetOut(Location::SameAsFirstInput()); 4337 break; 4338 } 4339 default: 4340 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4341 UNREACHABLE(); 4342 } 4343 } 4344 4345 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { 4346 LocationSummary* locations = ror->GetLocations(); 4347 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 4348 Location second = locations->InAt(1); 4349 4350 switch (ror->GetResultType()) { 4351 case DataType::Type::kInt32: 4352 if (second.IsRegister()) { 4353 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4354 __ rorl(first_reg, second_reg); 4355 } else { 4356 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4357 __ rorl(first_reg, imm); 4358 } 4359 break; 4360 case DataType::Type::kInt64: 4361 if (second.IsRegister()) { 4362 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4363 __ rorq(first_reg, second_reg); 4364 } else { 4365 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 4366 __ rorq(first_reg, imm); 4367 } 4368 break; 4369 default: 4370 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4371 UNREACHABLE(); 4372 } 4373 } 4374 4375 void LocationsBuilderX86_64::VisitShl(HShl* shl) { 4376 HandleShift(shl); 4377 } 4378 4379 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) { 4380 HandleShift(shl); 4381 } 4382 4383 void LocationsBuilderX86_64::VisitShr(HShr* shr) { 4384 HandleShift(shr); 4385 } 4386 4387 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) { 4388 HandleShift(shr); 4389 } 4390 4391 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) { 4392 HandleShift(ushr); 4393 } 4394 4395 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { 4396 HandleShift(ushr); 4397 } 4398 4399 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { 4400 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4401 instruction, LocationSummary::kCallOnMainOnly); 4402 InvokeRuntimeCallingConvention calling_convention; 4403 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4404 locations->SetOut(Location::RegisterLocation(RAX)); 4405 } 4406 4407 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { 4408 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 4409 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4410 DCHECK(!codegen_->IsLeafMethod()); 4411 } 4412 4413 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { 4414 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4415 instruction, LocationSummary::kCallOnMainOnly); 4416 InvokeRuntimeCallingConvention calling_convention; 4417 locations->SetOut(Location::RegisterLocation(RAX)); 4418 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4419 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 4420 } 4421 4422 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { 4423 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. 4424 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); 4425 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 4426 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 4427 DCHECK(!codegen_->IsLeafMethod()); 4428 } 4429 4430 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { 4431 LocationSummary* locations = 4432 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4433 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4434 if (location.IsStackSlot()) { 4435 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4436 } else if (location.IsDoubleStackSlot()) { 4437 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4438 } 4439 locations->SetOut(location); 4440 } 4441 4442 void InstructionCodeGeneratorX86_64::VisitParameterValue( 4443 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4444 // Nothing to do, the parameter is already at its location. 4445 } 4446 4447 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { 4448 LocationSummary* locations = 4449 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4450 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); 4451 } 4452 4453 void InstructionCodeGeneratorX86_64::VisitCurrentMethod( 4454 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4455 // Nothing to do, the method is already at its location. 4456 } 4457 4458 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4459 LocationSummary* locations = 4460 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4461 locations->SetInAt(0, Location::RequiresRegister()); 4462 locations->SetOut(Location::RequiresRegister()); 4463 } 4464 4465 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4466 LocationSummary* locations = instruction->GetLocations(); 4467 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 4468 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4469 instruction->GetIndex(), kX86_64PointerSize).SizeValue(); 4470 __ movq(locations->Out().AsRegister<CpuRegister>(), 4471 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); 4472 } else { 4473 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4474 instruction->GetIndex(), kX86_64PointerSize)); 4475 __ movq(locations->Out().AsRegister<CpuRegister>(), 4476 Address(locations->InAt(0).AsRegister<CpuRegister>(), 4477 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 4478 __ movq(locations->Out().AsRegister<CpuRegister>(), 4479 Address(locations->Out().AsRegister<CpuRegister>(), method_offset)); 4480 } 4481 } 4482 4483 void LocationsBuilderX86_64::VisitNot(HNot* not_) { 4484 LocationSummary* locations = 4485 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); 4486 locations->SetInAt(0, Location::RequiresRegister()); 4487 locations->SetOut(Location::SameAsFirstInput()); 4488 } 4489 4490 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { 4491 LocationSummary* locations = not_->GetLocations(); 4492 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4493 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4494 Location out = locations->Out(); 4495 switch (not_->GetResultType()) { 4496 case DataType::Type::kInt32: 4497 __ notl(out.AsRegister<CpuRegister>()); 4498 break; 4499 4500 case DataType::Type::kInt64: 4501 __ notq(out.AsRegister<CpuRegister>()); 4502 break; 4503 4504 default: 4505 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 4506 } 4507 } 4508 4509 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4510 LocationSummary* locations = 4511 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); 4512 locations->SetInAt(0, Location::RequiresRegister()); 4513 locations->SetOut(Location::SameAsFirstInput()); 4514 } 4515 4516 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4517 LocationSummary* locations = bool_not->GetLocations(); 4518 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4519 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4520 Location out = locations->Out(); 4521 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1)); 4522 } 4523 4524 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { 4525 LocationSummary* locations = 4526 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4527 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 4528 locations->SetInAt(i, Location::Any()); 4529 } 4530 locations->SetOut(Location::Any()); 4531 } 4532 4533 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4534 LOG(FATAL) << "Unimplemented"; 4535 } 4536 4537 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { 4538 /* 4539 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence. 4540 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. 4541 * For those cases, all we need to ensure is that there is a scheduling barrier in place. 4542 */ 4543 switch (kind) { 4544 case MemBarrierKind::kAnyAny: { 4545 MemoryFence(); 4546 break; 4547 } 4548 case MemBarrierKind::kAnyStore: 4549 case MemBarrierKind::kLoadAny: 4550 case MemBarrierKind::kStoreStore: { 4551 // nop 4552 break; 4553 } 4554 case MemBarrierKind::kNTStoreStore: 4555 // Non-Temporal Store/Store needs an explicit fence. 4556 MemoryFence(/* non-temporal= */ true); 4557 break; 4558 } 4559 } 4560 4561 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { 4562 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4563 4564 bool object_field_get_with_read_barrier = 4565 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 4566 LocationSummary* locations = 4567 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 4568 object_field_get_with_read_barrier 4569 ? LocationSummary::kCallOnSlowPath 4570 : LocationSummary::kNoCall); 4571 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 4572 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4573 } 4574 locations->SetInAt(0, Location::RequiresRegister()); 4575 if (DataType::IsFloatingPointType(instruction->GetType())) { 4576 locations->SetOut(Location::RequiresFpuRegister()); 4577 } else { 4578 // The output overlaps for an object field get when read barriers 4579 // are enabled: we do not want the move to overwrite the object's 4580 // location, as we need it to emit the read barrier. 4581 locations->SetOut( 4582 Location::RequiresRegister(), 4583 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4584 } 4585 } 4586 4587 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, 4588 const FieldInfo& field_info) { 4589 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4590 4591 LocationSummary* locations = instruction->GetLocations(); 4592 Location base_loc = locations->InAt(0); 4593 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 4594 Location out = locations->Out(); 4595 bool is_volatile = field_info.IsVolatile(); 4596 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 4597 DataType::Type load_type = instruction->GetType(); 4598 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4599 4600 switch (load_type) { 4601 case DataType::Type::kBool: 4602 case DataType::Type::kUint8: { 4603 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4604 break; 4605 } 4606 4607 case DataType::Type::kInt8: { 4608 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4609 break; 4610 } 4611 4612 case DataType::Type::kUint16: { 4613 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4614 break; 4615 } 4616 4617 case DataType::Type::kInt16: { 4618 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4619 break; 4620 } 4621 4622 case DataType::Type::kInt32: { 4623 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4624 break; 4625 } 4626 4627 case DataType::Type::kReference: { 4628 // /* HeapReference<Object> */ out = *(base + offset) 4629 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4630 // Note that a potential implicit null check is handled in this 4631 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. 4632 codegen_->GenerateFieldLoadWithBakerReadBarrier( 4633 instruction, out, base, offset, /* needs_null_check= */ true); 4634 if (is_volatile) { 4635 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4636 } 4637 } else { 4638 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4639 codegen_->MaybeRecordImplicitNullCheck(instruction); 4640 if (is_volatile) { 4641 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4642 } 4643 // If read barriers are enabled, emit read barriers other than 4644 // Baker's using a slow path (and also unpoison the loaded 4645 // reference, if heap poisoning is enabled). 4646 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 4647 } 4648 break; 4649 } 4650 4651 case DataType::Type::kInt64: { 4652 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); 4653 break; 4654 } 4655 4656 case DataType::Type::kFloat32: { 4657 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4658 break; 4659 } 4660 4661 case DataType::Type::kFloat64: { 4662 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4663 break; 4664 } 4665 4666 case DataType::Type::kUint32: 4667 case DataType::Type::kUint64: 4668 case DataType::Type::kVoid: 4669 LOG(FATAL) << "Unreachable type " << load_type; 4670 UNREACHABLE(); 4671 } 4672 4673 if (load_type == DataType::Type::kReference) { 4674 // Potential implicit null checks, in the case of reference 4675 // fields, are handled in the previous switch statement. 4676 } else { 4677 codegen_->MaybeRecordImplicitNullCheck(instruction); 4678 } 4679 4680 if (is_volatile) { 4681 if (load_type == DataType::Type::kReference) { 4682 // Memory barriers, in the case of references, are also handled 4683 // in the previous switch statement. 4684 } else { 4685 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4686 } 4687 } 4688 } 4689 4690 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, 4691 const FieldInfo& field_info) { 4692 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4693 4694 LocationSummary* locations = 4695 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4696 DataType::Type field_type = field_info.GetFieldType(); 4697 bool is_volatile = field_info.IsVolatile(); 4698 bool needs_write_barrier = 4699 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 4700 4701 locations->SetInAt(0, Location::RequiresRegister()); 4702 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 4703 if (is_volatile) { 4704 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4705 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); 4706 } else { 4707 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); 4708 } 4709 } else { 4710 if (is_volatile) { 4711 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4712 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); 4713 } else { 4714 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4715 } 4716 } 4717 if (needs_write_barrier) { 4718 // Temporary registers for the write barrier. 4719 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 4720 locations->AddTemp(Location::RequiresRegister()); 4721 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4722 // Temporary register for the reference poisoning. 4723 locations->AddTemp(Location::RequiresRegister()); 4724 } 4725 } 4726 4727 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, 4728 const FieldInfo& field_info, 4729 bool value_can_be_null) { 4730 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4731 4732 LocationSummary* locations = instruction->GetLocations(); 4733 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); 4734 Location value = locations->InAt(1); 4735 bool is_volatile = field_info.IsVolatile(); 4736 DataType::Type field_type = field_info.GetFieldType(); 4737 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4738 4739 if (is_volatile) { 4740 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 4741 } 4742 4743 bool maybe_record_implicit_null_check_done = false; 4744 4745 switch (field_type) { 4746 case DataType::Type::kBool: 4747 case DataType::Type::kUint8: 4748 case DataType::Type::kInt8: { 4749 if (value.IsConstant()) { 4750 __ movb(Address(base, offset), 4751 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 4752 } else { 4753 __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); 4754 } 4755 break; 4756 } 4757 4758 case DataType::Type::kUint16: 4759 case DataType::Type::kInt16: { 4760 if (value.IsConstant()) { 4761 __ movw(Address(base, offset), 4762 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 4763 } else { 4764 __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); 4765 } 4766 break; 4767 } 4768 4769 case DataType::Type::kInt32: 4770 case DataType::Type::kReference: { 4771 if (value.IsConstant()) { 4772 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4773 // `field_type == DataType::Type::kReference` implies `v == 0`. 4774 DCHECK((field_type != DataType::Type::kReference) || (v == 0)); 4775 // Note: if heap poisoning is enabled, no need to poison 4776 // (negate) `v` if it is a reference, as it would be null. 4777 __ movl(Address(base, offset), Immediate(v)); 4778 } else { 4779 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4780 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4781 __ movl(temp, value.AsRegister<CpuRegister>()); 4782 __ PoisonHeapReference(temp); 4783 __ movl(Address(base, offset), temp); 4784 } else { 4785 __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); 4786 } 4787 } 4788 break; 4789 } 4790 4791 case DataType::Type::kInt64: { 4792 if (value.IsConstant()) { 4793 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 4794 codegen_->MoveInt64ToAddress(Address(base, offset), 4795 Address(base, offset + sizeof(int32_t)), 4796 v, 4797 instruction); 4798 maybe_record_implicit_null_check_done = true; 4799 } else { 4800 __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); 4801 } 4802 break; 4803 } 4804 4805 case DataType::Type::kFloat32: { 4806 if (value.IsConstant()) { 4807 int32_t v = 4808 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 4809 __ movl(Address(base, offset), Immediate(v)); 4810 } else { 4811 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4812 } 4813 break; 4814 } 4815 4816 case DataType::Type::kFloat64: { 4817 if (value.IsConstant()) { 4818 int64_t v = 4819 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 4820 codegen_->MoveInt64ToAddress(Address(base, offset), 4821 Address(base, offset + sizeof(int32_t)), 4822 v, 4823 instruction); 4824 maybe_record_implicit_null_check_done = true; 4825 } else { 4826 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4827 } 4828 break; 4829 } 4830 4831 case DataType::Type::kUint32: 4832 case DataType::Type::kUint64: 4833 case DataType::Type::kVoid: 4834 LOG(FATAL) << "Unreachable type " << field_type; 4835 UNREACHABLE(); 4836 } 4837 4838 if (!maybe_record_implicit_null_check_done) { 4839 codegen_->MaybeRecordImplicitNullCheck(instruction); 4840 } 4841 4842 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 4843 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4844 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4845 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); 4846 } 4847 4848 if (is_volatile) { 4849 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 4850 } 4851 } 4852 4853 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4854 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4855 } 4856 4857 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4858 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4859 } 4860 4861 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4862 HandleFieldGet(instruction); 4863 } 4864 4865 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4866 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4867 } 4868 4869 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4870 HandleFieldGet(instruction); 4871 } 4872 4873 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4874 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4875 } 4876 4877 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4878 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4879 } 4880 4881 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4882 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4883 } 4884 4885 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( 4886 HUnresolvedInstanceFieldGet* instruction) { 4887 FieldAccessCallingConventionX86_64 calling_convention; 4888 codegen_->CreateUnresolvedFieldLocationSummary( 4889 instruction, instruction->GetFieldType(), calling_convention); 4890 } 4891 4892 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet( 4893 HUnresolvedInstanceFieldGet* instruction) { 4894 FieldAccessCallingConventionX86_64 calling_convention; 4895 codegen_->GenerateUnresolvedFieldAccess(instruction, 4896 instruction->GetFieldType(), 4897 instruction->GetFieldIndex(), 4898 instruction->GetDexPc(), 4899 calling_convention); 4900 } 4901 4902 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet( 4903 HUnresolvedInstanceFieldSet* instruction) { 4904 FieldAccessCallingConventionX86_64 calling_convention; 4905 codegen_->CreateUnresolvedFieldLocationSummary( 4906 instruction, instruction->GetFieldType(), calling_convention); 4907 } 4908 4909 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet( 4910 HUnresolvedInstanceFieldSet* instruction) { 4911 FieldAccessCallingConventionX86_64 calling_convention; 4912 codegen_->GenerateUnresolvedFieldAccess(instruction, 4913 instruction->GetFieldType(), 4914 instruction->GetFieldIndex(), 4915 instruction->GetDexPc(), 4916 calling_convention); 4917 } 4918 4919 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet( 4920 HUnresolvedStaticFieldGet* instruction) { 4921 FieldAccessCallingConventionX86_64 calling_convention; 4922 codegen_->CreateUnresolvedFieldLocationSummary( 4923 instruction, instruction->GetFieldType(), calling_convention); 4924 } 4925 4926 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet( 4927 HUnresolvedStaticFieldGet* instruction) { 4928 FieldAccessCallingConventionX86_64 calling_convention; 4929 codegen_->GenerateUnresolvedFieldAccess(instruction, 4930 instruction->GetFieldType(), 4931 instruction->GetFieldIndex(), 4932 instruction->GetDexPc(), 4933 calling_convention); 4934 } 4935 4936 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet( 4937 HUnresolvedStaticFieldSet* instruction) { 4938 FieldAccessCallingConventionX86_64 calling_convention; 4939 codegen_->CreateUnresolvedFieldLocationSummary( 4940 instruction, instruction->GetFieldType(), calling_convention); 4941 } 4942 4943 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( 4944 HUnresolvedStaticFieldSet* instruction) { 4945 FieldAccessCallingConventionX86_64 calling_convention; 4946 codegen_->GenerateUnresolvedFieldAccess(instruction, 4947 instruction->GetFieldType(), 4948 instruction->GetFieldIndex(), 4949 instruction->GetDexPc(), 4950 calling_convention); 4951 } 4952 4953 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { 4954 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4955 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() 4956 ? Location::RequiresRegister() 4957 : Location::Any(); 4958 locations->SetInAt(0, loc); 4959 } 4960 4961 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { 4962 if (CanMoveNullCheckToUser(instruction)) { 4963 return; 4964 } 4965 LocationSummary* locations = instruction->GetLocations(); 4966 Location obj = locations->InAt(0); 4967 4968 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); 4969 RecordPcInfo(instruction, instruction->GetDexPc()); 4970 } 4971 4972 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { 4973 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction); 4974 AddSlowPath(slow_path); 4975 4976 LocationSummary* locations = instruction->GetLocations(); 4977 Location obj = locations->InAt(0); 4978 4979 if (obj.IsRegister()) { 4980 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); 4981 } else if (obj.IsStackSlot()) { 4982 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); 4983 } else { 4984 DCHECK(obj.IsConstant()) << obj; 4985 DCHECK(obj.GetConstant()->IsNullConstant()); 4986 __ jmp(slow_path->GetEntryLabel()); 4987 return; 4988 } 4989 __ j(kEqual, slow_path->GetEntryLabel()); 4990 } 4991 4992 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { 4993 codegen_->GenerateNullCheck(instruction); 4994 } 4995 4996 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { 4997 bool object_array_get_with_read_barrier = 4998 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 4999 LocationSummary* locations = 5000 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 5001 object_array_get_with_read_barrier 5002 ? LocationSummary::kCallOnSlowPath 5003 : LocationSummary::kNoCall); 5004 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 5005 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5006 } 5007 locations->SetInAt(0, Location::RequiresRegister()); 5008 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5009 if (DataType::IsFloatingPointType(instruction->GetType())) { 5010 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5011 } else { 5012 // The output overlaps for an object array get when read barriers 5013 // are enabled: we do not want the move to overwrite the array's 5014 // location, as we need it to emit the read barrier. 5015 locations->SetOut( 5016 Location::RequiresRegister(), 5017 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 5018 } 5019 } 5020 5021 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { 5022 LocationSummary* locations = instruction->GetLocations(); 5023 Location obj_loc = locations->InAt(0); 5024 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 5025 Location index = locations->InAt(1); 5026 Location out_loc = locations->Out(); 5027 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 5028 5029 DataType::Type type = instruction->GetType(); 5030 switch (type) { 5031 case DataType::Type::kBool: 5032 case DataType::Type::kUint8: { 5033 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5034 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5035 break; 5036 } 5037 5038 case DataType::Type::kInt8: { 5039 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5040 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5041 break; 5042 } 5043 5044 case DataType::Type::kUint16: { 5045 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5046 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5047 // Branch cases into compressed and uncompressed for each index's type. 5048 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 5049 NearLabel done, not_compressed; 5050 __ testb(Address(obj, count_offset), Immediate(1)); 5051 codegen_->MaybeRecordImplicitNullCheck(instruction); 5052 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 5053 "Expecting 0=compressed, 1=uncompressed"); 5054 __ j(kNotZero, ¬_compressed); 5055 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5056 __ jmp(&done); 5057 __ Bind(¬_compressed); 5058 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5059 __ Bind(&done); 5060 } else { 5061 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5062 } 5063 break; 5064 } 5065 5066 case DataType::Type::kInt16: { 5067 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5068 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5069 break; 5070 } 5071 5072 case DataType::Type::kInt32: { 5073 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5074 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5075 break; 5076 } 5077 5078 case DataType::Type::kReference: { 5079 static_assert( 5080 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 5081 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 5082 // /* HeapReference<Object> */ out = 5083 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 5084 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 5085 // Note that a potential implicit null check is handled in this 5086 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. 5087 codegen_->GenerateArrayLoadWithBakerReadBarrier( 5088 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); 5089 } else { 5090 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5091 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5092 codegen_->MaybeRecordImplicitNullCheck(instruction); 5093 // If read barriers are enabled, emit read barriers other than 5094 // Baker's using a slow path (and also unpoison the loaded 5095 // reference, if heap poisoning is enabled). 5096 if (index.IsConstant()) { 5097 uint32_t offset = 5098 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; 5099 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 5100 } else { 5101 codegen_->MaybeGenerateReadBarrierSlow( 5102 instruction, out_loc, out_loc, obj_loc, data_offset, index); 5103 } 5104 } 5105 break; 5106 } 5107 5108 case DataType::Type::kInt64: { 5109 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5110 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 5111 break; 5112 } 5113 5114 case DataType::Type::kFloat32: { 5115 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5116 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5117 break; 5118 } 5119 5120 case DataType::Type::kFloat64: { 5121 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5122 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 5123 break; 5124 } 5125 5126 case DataType::Type::kUint32: 5127 case DataType::Type::kUint64: 5128 case DataType::Type::kVoid: 5129 LOG(FATAL) << "Unreachable type " << type; 5130 UNREACHABLE(); 5131 } 5132 5133 if (type == DataType::Type::kReference) { 5134 // Potential implicit null checks, in the case of reference 5135 // arrays, are handled in the previous switch statement. 5136 } else { 5137 codegen_->MaybeRecordImplicitNullCheck(instruction); 5138 } 5139 } 5140 5141 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 5142 DataType::Type value_type = instruction->GetComponentType(); 5143 5144 bool needs_write_barrier = 5145 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5146 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 5147 5148 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5149 instruction, 5150 may_need_runtime_call_for_type_check ? 5151 LocationSummary::kCallOnSlowPath : 5152 LocationSummary::kNoCall); 5153 5154 locations->SetInAt(0, Location::RequiresRegister()); 5155 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5156 if (DataType::IsFloatingPointType(value_type)) { 5157 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); 5158 } else { 5159 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); 5160 } 5161 5162 if (needs_write_barrier) { 5163 // Temporary registers for the write barrier. 5164 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 5165 locations->AddTemp(Location::RequiresRegister()); 5166 } 5167 } 5168 5169 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { 5170 LocationSummary* locations = instruction->GetLocations(); 5171 Location array_loc = locations->InAt(0); 5172 CpuRegister array = array_loc.AsRegister<CpuRegister>(); 5173 Location index = locations->InAt(1); 5174 Location value = locations->InAt(2); 5175 DataType::Type value_type = instruction->GetComponentType(); 5176 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 5177 bool needs_write_barrier = 5178 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5179 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 5180 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 5181 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 5182 5183 switch (value_type) { 5184 case DataType::Type::kBool: 5185 case DataType::Type::kUint8: 5186 case DataType::Type::kInt8: { 5187 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); 5188 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); 5189 if (value.IsRegister()) { 5190 __ movb(address, value.AsRegister<CpuRegister>()); 5191 } else { 5192 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 5193 } 5194 codegen_->MaybeRecordImplicitNullCheck(instruction); 5195 break; 5196 } 5197 5198 case DataType::Type::kUint16: 5199 case DataType::Type::kInt16: { 5200 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); 5201 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); 5202 if (value.IsRegister()) { 5203 __ movw(address, value.AsRegister<CpuRegister>()); 5204 } else { 5205 DCHECK(value.IsConstant()) << value; 5206 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 5207 } 5208 codegen_->MaybeRecordImplicitNullCheck(instruction); 5209 break; 5210 } 5211 5212 case DataType::Type::kReference: { 5213 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5214 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5215 5216 if (!value.IsRegister()) { 5217 // Just setting null. 5218 DCHECK(instruction->InputAt(2)->IsNullConstant()); 5219 DCHECK(value.IsConstant()) << value; 5220 __ movl(address, Immediate(0)); 5221 codegen_->MaybeRecordImplicitNullCheck(instruction); 5222 DCHECK(!needs_write_barrier); 5223 DCHECK(!may_need_runtime_call_for_type_check); 5224 break; 5225 } 5226 5227 DCHECK(needs_write_barrier); 5228 CpuRegister register_value = value.AsRegister<CpuRegister>(); 5229 // We cannot use a NearLabel for `done`, as its range may be too 5230 // short when Baker read barriers are enabled. 5231 Label done; 5232 NearLabel not_null, do_put; 5233 SlowPathCode* slow_path = nullptr; 5234 Location temp_loc = locations->GetTemp(0); 5235 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 5236 if (may_need_runtime_call_for_type_check) { 5237 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction); 5238 codegen_->AddSlowPath(slow_path); 5239 if (instruction->GetValueCanBeNull()) { 5240 __ testl(register_value, register_value); 5241 __ j(kNotEqual, ¬_null); 5242 __ movl(address, Immediate(0)); 5243 codegen_->MaybeRecordImplicitNullCheck(instruction); 5244 __ jmp(&done); 5245 __ Bind(¬_null); 5246 } 5247 5248 // Note that when Baker read barriers are enabled, the type 5249 // checks are performed without read barriers. This is fine, 5250 // even in the case where a class object is in the from-space 5251 // after the flip, as a comparison involving such a type would 5252 // not produce a false positive; it may of course produce a 5253 // false negative, in which case we would take the ArraySet 5254 // slow path. 5255 5256 // /* HeapReference<Class> */ temp = array->klass_ 5257 __ movl(temp, Address(array, class_offset)); 5258 codegen_->MaybeRecordImplicitNullCheck(instruction); 5259 __ MaybeUnpoisonHeapReference(temp); 5260 5261 // /* HeapReference<Class> */ temp = temp->component_type_ 5262 __ movl(temp, Address(temp, component_offset)); 5263 // If heap poisoning is enabled, no need to unpoison `temp` 5264 // nor the object reference in `register_value->klass`, as 5265 // we are comparing two poisoned references. 5266 __ cmpl(temp, Address(register_value, class_offset)); 5267 5268 if (instruction->StaticTypeOfArrayIsObjectArray()) { 5269 __ j(kEqual, &do_put); 5270 // If heap poisoning is enabled, the `temp` reference has 5271 // not been unpoisoned yet; unpoison it now. 5272 __ MaybeUnpoisonHeapReference(temp); 5273 5274 // If heap poisoning is enabled, no need to unpoison the 5275 // heap reference loaded below, as it is only used for a 5276 // comparison with null. 5277 __ cmpl(Address(temp, super_offset), Immediate(0)); 5278 __ j(kNotEqual, slow_path->GetEntryLabel()); 5279 __ Bind(&do_put); 5280 } else { 5281 __ j(kNotEqual, slow_path->GetEntryLabel()); 5282 } 5283 } 5284 5285 if (kPoisonHeapReferences) { 5286 __ movl(temp, register_value); 5287 __ PoisonHeapReference(temp); 5288 __ movl(address, temp); 5289 } else { 5290 __ movl(address, register_value); 5291 } 5292 if (!may_need_runtime_call_for_type_check) { 5293 codegen_->MaybeRecordImplicitNullCheck(instruction); 5294 } 5295 5296 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 5297 codegen_->MarkGCCard( 5298 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); 5299 __ Bind(&done); 5300 5301 if (slow_path != nullptr) { 5302 __ Bind(slow_path->GetExitLabel()); 5303 } 5304 5305 break; 5306 } 5307 5308 case DataType::Type::kInt32: { 5309 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5310 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5311 if (value.IsRegister()) { 5312 __ movl(address, value.AsRegister<CpuRegister>()); 5313 } else { 5314 DCHECK(value.IsConstant()) << value; 5315 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 5316 __ movl(address, Immediate(v)); 5317 } 5318 codegen_->MaybeRecordImplicitNullCheck(instruction); 5319 break; 5320 } 5321 5322 case DataType::Type::kInt64: { 5323 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); 5324 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5325 if (value.IsRegister()) { 5326 __ movq(address, value.AsRegister<CpuRegister>()); 5327 codegen_->MaybeRecordImplicitNullCheck(instruction); 5328 } else { 5329 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 5330 Address address_high = 5331 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5332 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5333 } 5334 break; 5335 } 5336 5337 case DataType::Type::kFloat32: { 5338 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); 5339 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5340 if (value.IsFpuRegister()) { 5341 __ movss(address, value.AsFpuRegister<XmmRegister>()); 5342 } else { 5343 DCHECK(value.IsConstant()); 5344 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 5345 __ movl(address, Immediate(v)); 5346 } 5347 codegen_->MaybeRecordImplicitNullCheck(instruction); 5348 break; 5349 } 5350 5351 case DataType::Type::kFloat64: { 5352 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); 5353 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5354 if (value.IsFpuRegister()) { 5355 __ movsd(address, value.AsFpuRegister<XmmRegister>()); 5356 codegen_->MaybeRecordImplicitNullCheck(instruction); 5357 } else { 5358 int64_t v = 5359 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 5360 Address address_high = 5361 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5362 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5363 } 5364 break; 5365 } 5366 5367 case DataType::Type::kUint32: 5368 case DataType::Type::kUint64: 5369 case DataType::Type::kVoid: 5370 LOG(FATAL) << "Unreachable type " << instruction->GetType(); 5371 UNREACHABLE(); 5372 } 5373 } 5374 5375 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { 5376 LocationSummary* locations = 5377 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5378 locations->SetInAt(0, Location::RequiresRegister()); 5379 if (!instruction->IsEmittedAtUseSite()) { 5380 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5381 } 5382 } 5383 5384 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { 5385 if (instruction->IsEmittedAtUseSite()) { 5386 return; 5387 } 5388 5389 LocationSummary* locations = instruction->GetLocations(); 5390 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 5391 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 5392 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 5393 __ movl(out, Address(obj, offset)); 5394 codegen_->MaybeRecordImplicitNullCheck(instruction); 5395 // Mask out most significant bit in case the array is String's array of char. 5396 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 5397 __ shrl(out, Immediate(1)); 5398 } 5399 } 5400 5401 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5402 RegisterSet caller_saves = RegisterSet::Empty(); 5403 InvokeRuntimeCallingConvention calling_convention; 5404 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5405 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 5406 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 5407 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 5408 HInstruction* length = instruction->InputAt(1); 5409 if (!length->IsEmittedAtUseSite()) { 5410 locations->SetInAt(1, Location::RegisterOrConstant(length)); 5411 } 5412 } 5413 5414 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5415 LocationSummary* locations = instruction->GetLocations(); 5416 Location index_loc = locations->InAt(0); 5417 Location length_loc = locations->InAt(1); 5418 SlowPathCode* slow_path = 5419 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction); 5420 5421 if (length_loc.IsConstant()) { 5422 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); 5423 if (index_loc.IsConstant()) { 5424 // BCE will remove the bounds check if we are guarenteed to pass. 5425 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5426 if (index < 0 || index >= length) { 5427 codegen_->AddSlowPath(slow_path); 5428 __ jmp(slow_path->GetEntryLabel()); 5429 } else { 5430 // Some optimization after BCE may have generated this, and we should not 5431 // generate a bounds check if it is a valid range. 5432 } 5433 return; 5434 } 5435 5436 // We have to reverse the jump condition because the length is the constant. 5437 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); 5438 __ cmpl(index_reg, Immediate(length)); 5439 codegen_->AddSlowPath(slow_path); 5440 __ j(kAboveEqual, slow_path->GetEntryLabel()); 5441 } else { 5442 HInstruction* array_length = instruction->InputAt(1); 5443 if (array_length->IsEmittedAtUseSite()) { 5444 // Address the length field in the array. 5445 DCHECK(array_length->IsArrayLength()); 5446 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 5447 Location array_loc = array_length->GetLocations()->InAt(0); 5448 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 5449 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5450 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for 5451 // the string compression flag) with the in-memory length and avoid the temporary. 5452 CpuRegister length_reg = CpuRegister(TMP); 5453 __ movl(length_reg, array_len); 5454 codegen_->MaybeRecordImplicitNullCheck(array_length); 5455 __ shrl(length_reg, Immediate(1)); 5456 codegen_->GenerateIntCompare(length_reg, index_loc); 5457 } else { 5458 // Checking the bound for general case: 5459 // Array of char or String's array when the compression feature off. 5460 if (index_loc.IsConstant()) { 5461 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5462 __ cmpl(array_len, Immediate(value)); 5463 } else { 5464 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); 5465 } 5466 codegen_->MaybeRecordImplicitNullCheck(array_length); 5467 } 5468 } else { 5469 codegen_->GenerateIntCompare(length_loc, index_loc); 5470 } 5471 codegen_->AddSlowPath(slow_path); 5472 __ j(kBelowEqual, slow_path->GetEntryLabel()); 5473 } 5474 } 5475 5476 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, 5477 CpuRegister card, 5478 CpuRegister object, 5479 CpuRegister value, 5480 bool value_can_be_null) { 5481 NearLabel is_null; 5482 if (value_can_be_null) { 5483 __ testl(value, value); 5484 __ j(kEqual, &is_null); 5485 } 5486 // Load the address of the card table into `card`. 5487 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), 5488 /* no_rip= */ true)); 5489 // Calculate the offset (in the card table) of the card corresponding to 5490 // `object`. 5491 __ movq(temp, object); 5492 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); 5493 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the 5494 // `object`'s card. 5495 // 5496 // Register `card` contains the address of the card table. Note that the card 5497 // table's base is biased during its creation so that it always starts at an 5498 // address whose least-significant byte is equal to `kCardDirty` (see 5499 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction 5500 // below writes the `kCardDirty` (byte) value into the `object`'s card 5501 // (located at `card + object >> kCardShift`). 5502 // 5503 // This dual use of the value in register `card` (1. to calculate the location 5504 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load 5505 // (no need to explicitly load `kCardDirty` as an immediate value). 5506 __ movb(Address(temp, card, TIMES_1, 0), card); 5507 if (value_can_be_null) { 5508 __ Bind(&is_null); 5509 } 5510 } 5511 5512 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5513 LOG(FATAL) << "Unimplemented"; 5514 } 5515 5516 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { 5517 if (instruction->GetNext()->IsSuspendCheck() && 5518 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5519 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5520 // The back edge will generate the suspend check. 5521 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5522 } 5523 5524 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5525 } 5526 5527 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5528 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5529 instruction, LocationSummary::kCallOnSlowPath); 5530 // In suspend check slow path, usually there are no caller-save registers at all. 5531 // If SIMD instructions are present, however, we force spilling all live SIMD 5532 // registers in full width (since the runtime only saves/restores lower part). 5533 locations->SetCustomSlowPathCallerSaves( 5534 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5535 } 5536 5537 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5538 HBasicBlock* block = instruction->GetBlock(); 5539 if (block->GetLoopInformation() != nullptr) { 5540 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5541 // The back edge will generate the suspend check. 5542 return; 5543 } 5544 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5545 // The goto will generate the suspend check. 5546 return; 5547 } 5548 GenerateSuspendCheck(instruction, nullptr); 5549 } 5550 5551 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, 5552 HBasicBlock* successor) { 5553 SuspendCheckSlowPathX86_64* slow_path = 5554 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); 5555 if (slow_path == nullptr) { 5556 slow_path = 5557 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor); 5558 instruction->SetSlowPath(slow_path); 5559 codegen_->AddSlowPath(slow_path); 5560 if (successor != nullptr) { 5561 DCHECK(successor->IsLoopHeader()); 5562 } 5563 } else { 5564 DCHECK_EQ(slow_path->GetSuccessor(), successor); 5565 } 5566 5567 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), 5568 /* no_rip= */ true), 5569 Immediate(0)); 5570 if (successor == nullptr) { 5571 __ j(kNotEqual, slow_path->GetEntryLabel()); 5572 __ Bind(slow_path->GetReturnLabel()); 5573 } else { 5574 __ j(kEqual, codegen_->GetLabelOf(successor)); 5575 __ jmp(slow_path->GetEntryLabel()); 5576 } 5577 } 5578 5579 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { 5580 return codegen_->GetAssembler(); 5581 } 5582 5583 void ParallelMoveResolverX86_64::EmitMove(size_t index) { 5584 MoveOperands* move = moves_[index]; 5585 Location source = move->GetSource(); 5586 Location destination = move->GetDestination(); 5587 5588 if (source.IsRegister()) { 5589 if (destination.IsRegister()) { 5590 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); 5591 } else if (destination.IsStackSlot()) { 5592 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 5593 source.AsRegister<CpuRegister>()); 5594 } else { 5595 DCHECK(destination.IsDoubleStackSlot()); 5596 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 5597 source.AsRegister<CpuRegister>()); 5598 } 5599 } else if (source.IsStackSlot()) { 5600 if (destination.IsRegister()) { 5601 __ movl(destination.AsRegister<CpuRegister>(), 5602 Address(CpuRegister(RSP), source.GetStackIndex())); 5603 } else if (destination.IsFpuRegister()) { 5604 __ movss(destination.AsFpuRegister<XmmRegister>(), 5605 Address(CpuRegister(RSP), source.GetStackIndex())); 5606 } else { 5607 DCHECK(destination.IsStackSlot()); 5608 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5609 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5610 } 5611 } else if (source.IsDoubleStackSlot()) { 5612 if (destination.IsRegister()) { 5613 __ movq(destination.AsRegister<CpuRegister>(), 5614 Address(CpuRegister(RSP), source.GetStackIndex())); 5615 } else if (destination.IsFpuRegister()) { 5616 __ movsd(destination.AsFpuRegister<XmmRegister>(), 5617 Address(CpuRegister(RSP), source.GetStackIndex())); 5618 } else { 5619 DCHECK(destination.IsDoubleStackSlot()) << destination; 5620 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5621 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5622 } 5623 } else if (source.IsSIMDStackSlot()) { 5624 if (destination.IsFpuRegister()) { 5625 __ movups(destination.AsFpuRegister<XmmRegister>(), 5626 Address(CpuRegister(RSP), source.GetStackIndex())); 5627 } else { 5628 DCHECK(destination.IsSIMDStackSlot()); 5629 size_t high = kX86_64WordSize; 5630 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5631 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5632 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); 5633 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); 5634 } 5635 } else if (source.IsConstant()) { 5636 HConstant* constant = source.GetConstant(); 5637 if (constant->IsIntConstant() || constant->IsNullConstant()) { 5638 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 5639 if (destination.IsRegister()) { 5640 if (value == 0) { 5641 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5642 } else { 5643 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); 5644 } 5645 } else { 5646 DCHECK(destination.IsStackSlot()) << destination; 5647 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 5648 } 5649 } else if (constant->IsLongConstant()) { 5650 int64_t value = constant->AsLongConstant()->GetValue(); 5651 if (destination.IsRegister()) { 5652 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); 5653 } else { 5654 DCHECK(destination.IsDoubleStackSlot()) << destination; 5655 codegen_->Store64BitValueToStack(destination, value); 5656 } 5657 } else if (constant->IsFloatConstant()) { 5658 float fp_value = constant->AsFloatConstant()->GetValue(); 5659 if (destination.IsFpuRegister()) { 5660 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5661 codegen_->Load32BitValue(dest, fp_value); 5662 } else { 5663 DCHECK(destination.IsStackSlot()) << destination; 5664 Immediate imm(bit_cast<int32_t, float>(fp_value)); 5665 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); 5666 } 5667 } else { 5668 DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); 5669 double fp_value = constant->AsDoubleConstant()->GetValue(); 5670 int64_t value = bit_cast<int64_t, double>(fp_value); 5671 if (destination.IsFpuRegister()) { 5672 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5673 codegen_->Load64BitValue(dest, fp_value); 5674 } else { 5675 DCHECK(destination.IsDoubleStackSlot()) << destination; 5676 codegen_->Store64BitValueToStack(destination, value); 5677 } 5678 } 5679 } else if (source.IsFpuRegister()) { 5680 if (destination.IsFpuRegister()) { 5681 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 5682 } else if (destination.IsStackSlot()) { 5683 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 5684 source.AsFpuRegister<XmmRegister>()); 5685 } else if (destination.IsDoubleStackSlot()) { 5686 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 5687 source.AsFpuRegister<XmmRegister>()); 5688 } else { 5689 DCHECK(destination.IsSIMDStackSlot()); 5690 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), 5691 source.AsFpuRegister<XmmRegister>()); 5692 } 5693 } 5694 } 5695 5696 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { 5697 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5698 __ movl(Address(CpuRegister(RSP), mem), reg); 5699 __ movl(reg, CpuRegister(TMP)); 5700 } 5701 5702 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { 5703 __ movq(CpuRegister(TMP), reg1); 5704 __ movq(reg1, reg2); 5705 __ movq(reg2, CpuRegister(TMP)); 5706 } 5707 5708 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { 5709 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5710 __ movq(Address(CpuRegister(RSP), mem), reg); 5711 __ movq(reg, CpuRegister(TMP)); 5712 } 5713 5714 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { 5715 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5716 __ movss(Address(CpuRegister(RSP), mem), reg); 5717 __ movd(reg, CpuRegister(TMP)); 5718 } 5719 5720 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { 5721 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5722 __ movsd(Address(CpuRegister(RSP), mem), reg); 5723 __ movd(reg, CpuRegister(TMP)); 5724 } 5725 5726 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { 5727 size_t extra_slot = 2 * kX86_64WordSize; 5728 __ subq(CpuRegister(RSP), Immediate(extra_slot)); 5729 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); 5730 ExchangeMemory64(0, mem + extra_slot, 2); 5731 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); 5732 __ addq(CpuRegister(RSP), Immediate(extra_slot)); 5733 } 5734 5735 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { 5736 ScratchRegisterScope ensure_scratch( 5737 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5738 5739 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5740 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); 5741 __ movl(CpuRegister(ensure_scratch.GetRegister()), 5742 Address(CpuRegister(RSP), mem2 + stack_offset)); 5743 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); 5744 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), 5745 CpuRegister(ensure_scratch.GetRegister())); 5746 } 5747 5748 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { 5749 ScratchRegisterScope ensure_scratch( 5750 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5751 5752 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5753 5754 // Now that temp registers are available (possibly spilled), exchange blocks of memory. 5755 for (int i = 0; i < num_of_qwords; i++) { 5756 __ movq(CpuRegister(TMP), 5757 Address(CpuRegister(RSP), mem1 + stack_offset)); 5758 __ movq(CpuRegister(ensure_scratch.GetRegister()), 5759 Address(CpuRegister(RSP), mem2 + stack_offset)); 5760 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), 5761 CpuRegister(TMP)); 5762 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), 5763 CpuRegister(ensure_scratch.GetRegister())); 5764 stack_offset += kX86_64WordSize; 5765 } 5766 } 5767 5768 void ParallelMoveResolverX86_64::EmitSwap(size_t index) { 5769 MoveOperands* move = moves_[index]; 5770 Location source = move->GetSource(); 5771 Location destination = move->GetDestination(); 5772 5773 if (source.IsRegister() && destination.IsRegister()) { 5774 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5775 } else if (source.IsRegister() && destination.IsStackSlot()) { 5776 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5777 } else if (source.IsStackSlot() && destination.IsRegister()) { 5778 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5779 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 5780 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); 5781 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { 5782 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5783 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { 5784 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5785 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { 5786 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); 5787 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 5788 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); 5789 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); 5790 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); 5791 } else if (source.IsFpuRegister() && destination.IsStackSlot()) { 5792 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5793 } else if (source.IsStackSlot() && destination.IsFpuRegister()) { 5794 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5795 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { 5796 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5797 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { 5798 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5799 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { 5800 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); 5801 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { 5802 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5803 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { 5804 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5805 } else { 5806 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; 5807 } 5808 } 5809 5810 5811 void ParallelMoveResolverX86_64::SpillScratch(int reg) { 5812 __ pushq(CpuRegister(reg)); 5813 } 5814 5815 5816 void ParallelMoveResolverX86_64::RestoreScratch(int reg) { 5817 __ popq(CpuRegister(reg)); 5818 } 5819 5820 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( 5821 SlowPathCode* slow_path, CpuRegister class_reg) { 5822 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 5823 const size_t status_byte_offset = 5824 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 5825 constexpr uint32_t shifted_initialized_value = 5826 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 5827 5828 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); 5829 __ j(kBelow, slow_path->GetEntryLabel()); 5830 __ Bind(slow_path->GetExitLabel()); 5831 // No need for memory fence, thanks to the x86-64 memory model. 5832 } 5833 5834 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, 5835 CpuRegister temp) { 5836 uint32_t path_to_root = check->GetBitstringPathToRoot(); 5837 uint32_t mask = check->GetBitstringMask(); 5838 DCHECK(IsPowerOfTwo(mask + 1)); 5839 size_t mask_bits = WhichPowerOf2(mask + 1); 5840 5841 if (mask_bits == 16u) { 5842 // Compare the bitstring in memory. 5843 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); 5844 } else { 5845 // /* uint32_t */ temp = temp->status_ 5846 __ movl(temp, Address(temp, mirror::Class::StatusOffset())); 5847 // Compare the bitstring bits using SUB. 5848 __ subl(temp, Immediate(path_to_root)); 5849 // Shift out bits that do not contribute to the comparison. 5850 __ shll(temp, Immediate(32u - mask_bits)); 5851 } 5852 } 5853 5854 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( 5855 HLoadClass::LoadKind desired_class_load_kind) { 5856 switch (desired_class_load_kind) { 5857 case HLoadClass::LoadKind::kInvalid: 5858 LOG(FATAL) << "UNREACHABLE"; 5859 UNREACHABLE(); 5860 case HLoadClass::LoadKind::kReferrersClass: 5861 break; 5862 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5863 case HLoadClass::LoadKind::kBootImageRelRo: 5864 case HLoadClass::LoadKind::kBssEntry: 5865 DCHECK(!Runtime::Current()->UseJitCompilation()); 5866 break; 5867 case HLoadClass::LoadKind::kJitBootImageAddress: 5868 case HLoadClass::LoadKind::kJitTableAddress: 5869 DCHECK(Runtime::Current()->UseJitCompilation()); 5870 break; 5871 case HLoadClass::LoadKind::kRuntimeCall: 5872 break; 5873 } 5874 return desired_class_load_kind; 5875 } 5876 5877 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { 5878 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5879 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5880 // Custom calling convention: RAX serves as both input and output. 5881 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 5882 cls, 5883 Location::RegisterLocation(RAX), 5884 Location::RegisterLocation(RAX)); 5885 return; 5886 } 5887 DCHECK(!cls->NeedsAccessCheck()); 5888 5889 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 5890 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 5891 ? LocationSummary::kCallOnSlowPath 5892 : LocationSummary::kNoCall; 5893 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 5894 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 5895 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5896 } 5897 5898 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 5899 locations->SetInAt(0, Location::RequiresRegister()); 5900 } 5901 locations->SetOut(Location::RequiresRegister()); 5902 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 5903 if (!kUseReadBarrier || kUseBakerReadBarrier) { 5904 // Rely on the type resolution and/or initialization to save everything. 5905 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 5906 } else { 5907 // For non-Baker read barrier we have a temp-clobbering call. 5908 } 5909 } 5910 } 5911 5912 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, 5913 dex::TypeIndex type_index, 5914 Handle<mirror::Class> handle) { 5915 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 5916 // Add a patch entry and return the label. 5917 jit_class_patches_.emplace_back(&dex_file, type_index.index_); 5918 PatchInfo<Label>* info = &jit_class_patches_.back(); 5919 return &info->label; 5920 } 5921 5922 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 5923 // move. 5924 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 5925 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5926 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5927 codegen_->GenerateLoadClassRuntimeCall(cls); 5928 return; 5929 } 5930 DCHECK(!cls->NeedsAccessCheck()); 5931 5932 LocationSummary* locations = cls->GetLocations(); 5933 Location out_loc = locations->Out(); 5934 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5935 5936 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 5937 ? kWithoutReadBarrier 5938 : kCompilerReadBarrierOption; 5939 bool generate_null_check = false; 5940 switch (load_kind) { 5941 case HLoadClass::LoadKind::kReferrersClass: { 5942 DCHECK(!cls->CanCallRuntime()); 5943 DCHECK(!cls->MustGenerateClinitCheck()); 5944 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 5945 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); 5946 GenerateGcRootFieldLoad( 5947 cls, 5948 out_loc, 5949 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), 5950 /* fixup_label= */ nullptr, 5951 read_barrier_option); 5952 break; 5953 } 5954 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5955 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 5956 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5957 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 5958 codegen_->RecordBootImageTypePatch(cls); 5959 break; 5960 case HLoadClass::LoadKind::kBootImageRelRo: { 5961 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5962 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 5963 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); 5964 break; 5965 } 5966 case HLoadClass::LoadKind::kBssEntry: { 5967 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5968 /* no_rip= */ false); 5969 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); 5970 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 5971 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5972 generate_null_check = true; 5973 break; 5974 } 5975 case HLoadClass::LoadKind::kJitBootImageAddress: { 5976 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 5977 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); 5978 DCHECK_NE(address, 0u); 5979 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 5980 break; 5981 } 5982 case HLoadClass::LoadKind::kJitTableAddress: { 5983 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 5984 /* no_rip= */ true); 5985 Label* fixup_label = 5986 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); 5987 // /* GcRoot<mirror::Class> */ out = *address 5988 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 5989 break; 5990 } 5991 default: 5992 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind(); 5993 UNREACHABLE(); 5994 } 5995 5996 if (generate_null_check || cls->MustGenerateClinitCheck()) { 5997 DCHECK(cls->CanCallRuntime()); 5998 SlowPathCode* slow_path = 5999 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls); 6000 codegen_->AddSlowPath(slow_path); 6001 if (generate_null_check) { 6002 __ testl(out, out); 6003 __ j(kEqual, slow_path->GetEntryLabel()); 6004 } 6005 if (cls->MustGenerateClinitCheck()) { 6006 GenerateClassInitializationCheck(slow_path, out); 6007 } else { 6008 __ Bind(slow_path->GetExitLabel()); 6009 } 6010 } 6011 } 6012 6013 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { 6014 LocationSummary* locations = 6015 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 6016 locations->SetInAt(0, Location::RequiresRegister()); 6017 if (check->HasUses()) { 6018 locations->SetOut(Location::SameAsFirstInput()); 6019 } 6020 // Rely on the type initialization to save everything we need. 6021 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6022 } 6023 6024 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6025 // Custom calling convention: RAX serves as both input and output. 6026 Location location = Location::RegisterLocation(RAX); 6027 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); 6028 } 6029 6030 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6031 codegen_->GenerateLoadMethodHandleRuntimeCall(load); 6032 } 6033 6034 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) { 6035 // Custom calling convention: RAX serves as both input and output. 6036 Location location = Location::RegisterLocation(RAX); 6037 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); 6038 } 6039 6040 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) { 6041 codegen_->GenerateLoadMethodTypeRuntimeCall(load); 6042 } 6043 6044 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { 6045 // We assume the class to not be null. 6046 SlowPathCode* slow_path = 6047 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check); 6048 codegen_->AddSlowPath(slow_path); 6049 GenerateClassInitializationCheck(slow_path, 6050 check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); 6051 } 6052 6053 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( 6054 HLoadString::LoadKind desired_string_load_kind) { 6055 switch (desired_string_load_kind) { 6056 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 6057 case HLoadString::LoadKind::kBootImageRelRo: 6058 case HLoadString::LoadKind::kBssEntry: 6059 DCHECK(!Runtime::Current()->UseJitCompilation()); 6060 break; 6061 case HLoadString::LoadKind::kJitBootImageAddress: 6062 case HLoadString::LoadKind::kJitTableAddress: 6063 DCHECK(Runtime::Current()->UseJitCompilation()); 6064 break; 6065 case HLoadString::LoadKind::kRuntimeCall: 6066 break; 6067 } 6068 return desired_string_load_kind; 6069 } 6070 6071 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { 6072 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 6073 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 6074 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 6075 locations->SetOut(Location::RegisterLocation(RAX)); 6076 } else { 6077 locations->SetOut(Location::RequiresRegister()); 6078 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 6079 if (!kUseReadBarrier || kUseBakerReadBarrier) { 6080 // Rely on the pResolveString to save everything. 6081 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6082 } else { 6083 // For non-Baker read barrier we have a temp-clobbering call. 6084 } 6085 } 6086 } 6087 } 6088 6089 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, 6090 dex::StringIndex string_index, 6091 Handle<mirror::String> handle) { 6092 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 6093 // Add a patch entry and return the label. 6094 jit_string_patches_.emplace_back(&dex_file, string_index.index_); 6095 PatchInfo<Label>* info = &jit_string_patches_.back(); 6096 return &info->label; 6097 } 6098 6099 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 6100 // move. 6101 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 6102 LocationSummary* locations = load->GetLocations(); 6103 Location out_loc = locations->Out(); 6104 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 6105 6106 switch (load->GetLoadKind()) { 6107 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 6108 DCHECK(codegen_->GetCompilerOptions().IsBootImage()); 6109 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6110 codegen_->RecordBootImageStringPatch(load); 6111 return; 6112 } 6113 case HLoadString::LoadKind::kBootImageRelRo: { 6114 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 6115 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6116 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); 6117 return; 6118 } 6119 case HLoadString::LoadKind::kBssEntry: { 6120 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6121 /* no_rip= */ false); 6122 Label* fixup_label = codegen_->NewStringBssEntryPatch(load); 6123 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 6124 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6125 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); 6126 codegen_->AddSlowPath(slow_path); 6127 __ testl(out, out); 6128 __ j(kEqual, slow_path->GetEntryLabel()); 6129 __ Bind(slow_path->GetExitLabel()); 6130 return; 6131 } 6132 case HLoadString::LoadKind::kJitBootImageAddress: { 6133 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); 6134 DCHECK_NE(address, 0u); 6135 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 6136 return; 6137 } 6138 case HLoadString::LoadKind::kJitTableAddress: { 6139 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6140 /* no_rip= */ true); 6141 Label* fixup_label = codegen_->NewJitRootStringPatch( 6142 load->GetDexFile(), load->GetStringIndex(), load->GetString()); 6143 // /* GcRoot<mirror::String> */ out = *address 6144 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6145 return; 6146 } 6147 default: 6148 break; 6149 } 6150 6151 // TODO: Re-add the compiler code to do string dex cache lookup again. 6152 // Custom calling convention: RAX serves as both input and output. 6153 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); 6154 codegen_->InvokeRuntime(kQuickResolveString, 6155 load, 6156 load->GetDexPc()); 6157 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 6158 } 6159 6160 static Address GetExceptionTlsAddress() { 6161 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), 6162 /* no_rip= */ true); 6163 } 6164 6165 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { 6166 LocationSummary* locations = 6167 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 6168 locations->SetOut(Location::RequiresRegister()); 6169 } 6170 6171 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { 6172 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress()); 6173 } 6174 6175 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { 6176 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 6177 } 6178 6179 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 6180 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); 6181 } 6182 6183 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { 6184 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6185 instruction, LocationSummary::kCallOnMainOnly); 6186 InvokeRuntimeCallingConvention calling_convention; 6187 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6188 } 6189 6190 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { 6191 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 6192 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 6193 } 6194 6195 // Temp is used for read barrier. 6196 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 6197 if (kEmitCompilerReadBarrier && 6198 !kUseBakerReadBarrier && 6199 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 6200 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 6201 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 6202 return 1; 6203 } 6204 return 0; 6205 } 6206 6207 // Interface case has 2 temps, one for holding the number of interfaces, one for the current 6208 // interface pointer, the current interface is compared in memory. 6209 // The other checks have one temp for loading the object's class. 6210 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 6211 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6212 return 2; 6213 } 6214 return 1 + NumberOfInstanceOfTemps(type_check_kind); 6215 } 6216 6217 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { 6218 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 6219 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6220 bool baker_read_barrier_slow_path = false; 6221 switch (type_check_kind) { 6222 case TypeCheckKind::kExactCheck: 6223 case TypeCheckKind::kAbstractClassCheck: 6224 case TypeCheckKind::kClassHierarchyCheck: 6225 case TypeCheckKind::kArrayObjectCheck: { 6226 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 6227 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 6228 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 6229 break; 6230 } 6231 case TypeCheckKind::kArrayCheck: 6232 case TypeCheckKind::kUnresolvedCheck: 6233 case TypeCheckKind::kInterfaceCheck: 6234 call_kind = LocationSummary::kCallOnSlowPath; 6235 break; 6236 case TypeCheckKind::kBitstringCheck: 6237 break; 6238 } 6239 6240 LocationSummary* locations = 6241 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6242 if (baker_read_barrier_slow_path) { 6243 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6244 } 6245 locations->SetInAt(0, Location::RequiresRegister()); 6246 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 6247 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 6248 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 6249 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 6250 } else { 6251 locations->SetInAt(1, Location::Any()); 6252 } 6253 // Note that TypeCheckSlowPathX86_64 uses this "out" register too. 6254 locations->SetOut(Location::RequiresRegister()); 6255 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 6256 } 6257 6258 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { 6259 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6260 LocationSummary* locations = instruction->GetLocations(); 6261 Location obj_loc = locations->InAt(0); 6262 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6263 Location cls = locations->InAt(1); 6264 Location out_loc = locations->Out(); 6265 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 6266 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 6267 DCHECK_LE(num_temps, 1u); 6268 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); 6269 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6270 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6271 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6272 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6273 SlowPathCode* slow_path = nullptr; 6274 NearLabel done, zero; 6275 6276 // Return 0 if `obj` is null. 6277 // Avoid null check if we know obj is not null. 6278 if (instruction->MustDoNullCheck()) { 6279 __ testl(obj, obj); 6280 __ j(kEqual, &zero); 6281 } 6282 6283 switch (type_check_kind) { 6284 case TypeCheckKind::kExactCheck: { 6285 ReadBarrierOption read_barrier_option = 6286 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6287 // /* HeapReference<Class> */ out = obj->klass_ 6288 GenerateReferenceLoadTwoRegisters(instruction, 6289 out_loc, 6290 obj_loc, 6291 class_offset, 6292 read_barrier_option); 6293 if (cls.IsRegister()) { 6294 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6295 } else { 6296 DCHECK(cls.IsStackSlot()) << cls; 6297 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6298 } 6299 if (zero.IsLinked()) { 6300 // Classes must be equal for the instanceof to succeed. 6301 __ j(kNotEqual, &zero); 6302 __ movl(out, Immediate(1)); 6303 __ jmp(&done); 6304 } else { 6305 __ setcc(kEqual, out); 6306 // setcc only sets the low byte. 6307 __ andl(out, Immediate(1)); 6308 } 6309 break; 6310 } 6311 6312 case TypeCheckKind::kAbstractClassCheck: { 6313 ReadBarrierOption read_barrier_option = 6314 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6315 // /* HeapReference<Class> */ out = obj->klass_ 6316 GenerateReferenceLoadTwoRegisters(instruction, 6317 out_loc, 6318 obj_loc, 6319 class_offset, 6320 read_barrier_option); 6321 // If the class is abstract, we eagerly fetch the super class of the 6322 // object to avoid doing a comparison we know will fail. 6323 NearLabel loop, success; 6324 __ Bind(&loop); 6325 // /* HeapReference<Class> */ out = out->super_class_ 6326 GenerateReferenceLoadOneRegister(instruction, 6327 out_loc, 6328 super_offset, 6329 maybe_temp_loc, 6330 read_barrier_option); 6331 __ testl(out, out); 6332 // If `out` is null, we use it for the result, and jump to `done`. 6333 __ j(kEqual, &done); 6334 if (cls.IsRegister()) { 6335 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6336 } else { 6337 DCHECK(cls.IsStackSlot()) << cls; 6338 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6339 } 6340 __ j(kNotEqual, &loop); 6341 __ movl(out, Immediate(1)); 6342 if (zero.IsLinked()) { 6343 __ jmp(&done); 6344 } 6345 break; 6346 } 6347 6348 case TypeCheckKind::kClassHierarchyCheck: { 6349 ReadBarrierOption read_barrier_option = 6350 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6351 // /* HeapReference<Class> */ out = obj->klass_ 6352 GenerateReferenceLoadTwoRegisters(instruction, 6353 out_loc, 6354 obj_loc, 6355 class_offset, 6356 read_barrier_option); 6357 // Walk over the class hierarchy to find a match. 6358 NearLabel loop, success; 6359 __ Bind(&loop); 6360 if (cls.IsRegister()) { 6361 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6362 } else { 6363 DCHECK(cls.IsStackSlot()) << cls; 6364 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6365 } 6366 __ j(kEqual, &success); 6367 // /* HeapReference<Class> */ out = out->super_class_ 6368 GenerateReferenceLoadOneRegister(instruction, 6369 out_loc, 6370 super_offset, 6371 maybe_temp_loc, 6372 read_barrier_option); 6373 __ testl(out, out); 6374 __ j(kNotEqual, &loop); 6375 // If `out` is null, we use it for the result, and jump to `done`. 6376 __ jmp(&done); 6377 __ Bind(&success); 6378 __ movl(out, Immediate(1)); 6379 if (zero.IsLinked()) { 6380 __ jmp(&done); 6381 } 6382 break; 6383 } 6384 6385 case TypeCheckKind::kArrayObjectCheck: { 6386 ReadBarrierOption read_barrier_option = 6387 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6388 // /* HeapReference<Class> */ out = obj->klass_ 6389 GenerateReferenceLoadTwoRegisters(instruction, 6390 out_loc, 6391 obj_loc, 6392 class_offset, 6393 read_barrier_option); 6394 // Do an exact check. 6395 NearLabel exact_check; 6396 if (cls.IsRegister()) { 6397 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6398 } else { 6399 DCHECK(cls.IsStackSlot()) << cls; 6400 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6401 } 6402 __ j(kEqual, &exact_check); 6403 // Otherwise, we need to check that the object's class is a non-primitive array. 6404 // /* HeapReference<Class> */ out = out->component_type_ 6405 GenerateReferenceLoadOneRegister(instruction, 6406 out_loc, 6407 component_offset, 6408 maybe_temp_loc, 6409 read_barrier_option); 6410 __ testl(out, out); 6411 // If `out` is null, we use it for the result, and jump to `done`. 6412 __ j(kEqual, &done); 6413 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); 6414 __ j(kNotEqual, &zero); 6415 __ Bind(&exact_check); 6416 __ movl(out, Immediate(1)); 6417 __ jmp(&done); 6418 break; 6419 } 6420 6421 case TypeCheckKind::kArrayCheck: { 6422 // No read barrier since the slow path will retry upon failure. 6423 // /* HeapReference<Class> */ out = obj->klass_ 6424 GenerateReferenceLoadTwoRegisters(instruction, 6425 out_loc, 6426 obj_loc, 6427 class_offset, 6428 kWithoutReadBarrier); 6429 if (cls.IsRegister()) { 6430 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6431 } else { 6432 DCHECK(cls.IsStackSlot()) << cls; 6433 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6434 } 6435 DCHECK(locations->OnlyCallsOnSlowPath()); 6436 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6437 instruction, /* is_fatal= */ false); 6438 codegen_->AddSlowPath(slow_path); 6439 __ j(kNotEqual, slow_path->GetEntryLabel()); 6440 __ movl(out, Immediate(1)); 6441 if (zero.IsLinked()) { 6442 __ jmp(&done); 6443 } 6444 break; 6445 } 6446 6447 case TypeCheckKind::kUnresolvedCheck: 6448 case TypeCheckKind::kInterfaceCheck: { 6449 // Note that we indeed only call on slow path, but we always go 6450 // into the slow path for the unresolved and interface check 6451 // cases. 6452 // 6453 // We cannot directly call the InstanceofNonTrivial runtime 6454 // entry point without resorting to a type checking slow path 6455 // here (i.e. by calling InvokeRuntime directly), as it would 6456 // require to assign fixed registers for the inputs of this 6457 // HInstanceOf instruction (following the runtime calling 6458 // convention), which might be cluttered by the potential first 6459 // read barrier emission at the beginning of this method. 6460 // 6461 // TODO: Introduce a new runtime entry point taking the object 6462 // to test (instead of its class) as argument, and let it deal 6463 // with the read barrier issues. This will let us refactor this 6464 // case of the `switch` code as it was previously (with a direct 6465 // call to the runtime not using a type checking slow path). 6466 // This should also be beneficial for the other cases above. 6467 DCHECK(locations->OnlyCallsOnSlowPath()); 6468 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6469 instruction, /* is_fatal= */ false); 6470 codegen_->AddSlowPath(slow_path); 6471 __ jmp(slow_path->GetEntryLabel()); 6472 if (zero.IsLinked()) { 6473 __ jmp(&done); 6474 } 6475 break; 6476 } 6477 6478 case TypeCheckKind::kBitstringCheck: { 6479 // /* HeapReference<Class> */ temp = obj->klass_ 6480 GenerateReferenceLoadTwoRegisters(instruction, 6481 out_loc, 6482 obj_loc, 6483 class_offset, 6484 kWithoutReadBarrier); 6485 6486 GenerateBitstringTypeCheckCompare(instruction, out); 6487 if (zero.IsLinked()) { 6488 __ j(kNotEqual, &zero); 6489 __ movl(out, Immediate(1)); 6490 __ jmp(&done); 6491 } else { 6492 __ setcc(kEqual, out); 6493 // setcc only sets the low byte. 6494 __ andl(out, Immediate(1)); 6495 } 6496 break; 6497 } 6498 } 6499 6500 if (zero.IsLinked()) { 6501 __ Bind(&zero); 6502 __ xorl(out, out); 6503 } 6504 6505 if (done.IsLinked()) { 6506 __ Bind(&done); 6507 } 6508 6509 if (slow_path != nullptr) { 6510 __ Bind(slow_path->GetExitLabel()); 6511 } 6512 } 6513 6514 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { 6515 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6516 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 6517 LocationSummary* locations = 6518 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6519 locations->SetInAt(0, Location::RequiresRegister()); 6520 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6521 // Require a register for the interface check since there is a loop that compares the class to 6522 // a memory address. 6523 locations->SetInAt(1, Location::RequiresRegister()); 6524 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { 6525 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 6526 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 6527 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 6528 } else { 6529 locations->SetInAt(1, Location::Any()); 6530 } 6531 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. 6532 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 6533 } 6534 6535 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { 6536 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6537 LocationSummary* locations = instruction->GetLocations(); 6538 Location obj_loc = locations->InAt(0); 6539 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6540 Location cls = locations->InAt(1); 6541 Location temp_loc = locations->GetTemp(0); 6542 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 6543 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 6544 DCHECK_GE(num_temps, 1u); 6545 DCHECK_LE(num_temps, 2u); 6546 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); 6547 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6548 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6549 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6550 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6551 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 6552 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 6553 const uint32_t object_array_data_offset = 6554 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 6555 6556 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 6557 SlowPathCode* type_check_slow_path = 6558 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6559 instruction, is_type_check_slow_path_fatal); 6560 codegen_->AddSlowPath(type_check_slow_path); 6561 6562 6563 NearLabel done; 6564 // Avoid null check if we know obj is not null. 6565 if (instruction->MustDoNullCheck()) { 6566 __ testl(obj, obj); 6567 __ j(kEqual, &done); 6568 } 6569 6570 switch (type_check_kind) { 6571 case TypeCheckKind::kExactCheck: 6572 case TypeCheckKind::kArrayCheck: { 6573 // /* HeapReference<Class> */ temp = obj->klass_ 6574 GenerateReferenceLoadTwoRegisters(instruction, 6575 temp_loc, 6576 obj_loc, 6577 class_offset, 6578 kWithoutReadBarrier); 6579 if (cls.IsRegister()) { 6580 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6581 } else { 6582 DCHECK(cls.IsStackSlot()) << cls; 6583 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6584 } 6585 // Jump to slow path for throwing the exception or doing a 6586 // more involved array check. 6587 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6588 break; 6589 } 6590 6591 case TypeCheckKind::kAbstractClassCheck: { 6592 // /* HeapReference<Class> */ temp = obj->klass_ 6593 GenerateReferenceLoadTwoRegisters(instruction, 6594 temp_loc, 6595 obj_loc, 6596 class_offset, 6597 kWithoutReadBarrier); 6598 // If the class is abstract, we eagerly fetch the super class of the 6599 // object to avoid doing a comparison we know will fail. 6600 NearLabel loop; 6601 __ Bind(&loop); 6602 // /* HeapReference<Class> */ temp = temp->super_class_ 6603 GenerateReferenceLoadOneRegister(instruction, 6604 temp_loc, 6605 super_offset, 6606 maybe_temp2_loc, 6607 kWithoutReadBarrier); 6608 6609 // If the class reference currently in `temp` is null, jump to the slow path to throw the 6610 // exception. 6611 __ testl(temp, temp); 6612 // Otherwise, compare the classes. 6613 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6614 if (cls.IsRegister()) { 6615 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6616 } else { 6617 DCHECK(cls.IsStackSlot()) << cls; 6618 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6619 } 6620 __ j(kNotEqual, &loop); 6621 break; 6622 } 6623 6624 case TypeCheckKind::kClassHierarchyCheck: { 6625 // /* HeapReference<Class> */ temp = obj->klass_ 6626 GenerateReferenceLoadTwoRegisters(instruction, 6627 temp_loc, 6628 obj_loc, 6629 class_offset, 6630 kWithoutReadBarrier); 6631 // Walk over the class hierarchy to find a match. 6632 NearLabel loop; 6633 __ Bind(&loop); 6634 if (cls.IsRegister()) { 6635 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6636 } else { 6637 DCHECK(cls.IsStackSlot()) << cls; 6638 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6639 } 6640 __ j(kEqual, &done); 6641 6642 // /* HeapReference<Class> */ temp = temp->super_class_ 6643 GenerateReferenceLoadOneRegister(instruction, 6644 temp_loc, 6645 super_offset, 6646 maybe_temp2_loc, 6647 kWithoutReadBarrier); 6648 6649 // If the class reference currently in `temp` is not null, jump 6650 // back at the beginning of the loop. 6651 __ testl(temp, temp); 6652 __ j(kNotZero, &loop); 6653 // Otherwise, jump to the slow path to throw the exception. 6654 __ jmp(type_check_slow_path->GetEntryLabel()); 6655 break; 6656 } 6657 6658 case TypeCheckKind::kArrayObjectCheck: { 6659 // /* HeapReference<Class> */ temp = obj->klass_ 6660 GenerateReferenceLoadTwoRegisters(instruction, 6661 temp_loc, 6662 obj_loc, 6663 class_offset, 6664 kWithoutReadBarrier); 6665 // Do an exact check. 6666 NearLabel check_non_primitive_component_type; 6667 if (cls.IsRegister()) { 6668 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6669 } else { 6670 DCHECK(cls.IsStackSlot()) << cls; 6671 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6672 } 6673 __ j(kEqual, &done); 6674 6675 // Otherwise, we need to check that the object's class is a non-primitive array. 6676 // /* HeapReference<Class> */ temp = temp->component_type_ 6677 GenerateReferenceLoadOneRegister(instruction, 6678 temp_loc, 6679 component_offset, 6680 maybe_temp2_loc, 6681 kWithoutReadBarrier); 6682 6683 // If the component type is not null (i.e. the object is indeed 6684 // an array), jump to label `check_non_primitive_component_type` 6685 // to further check that this component type is not a primitive 6686 // type. 6687 __ testl(temp, temp); 6688 // Otherwise, jump to the slow path to throw the exception. 6689 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6690 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); 6691 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6692 break; 6693 } 6694 6695 case TypeCheckKind::kUnresolvedCheck: { 6696 // We always go into the type check slow path for the unresolved case. 6697 // 6698 // We cannot directly call the CheckCast runtime entry point 6699 // without resorting to a type checking slow path here (i.e. by 6700 // calling InvokeRuntime directly), as it would require to 6701 // assign fixed registers for the inputs of this HInstanceOf 6702 // instruction (following the runtime calling convention), which 6703 // might be cluttered by the potential first read barrier 6704 // emission at the beginning of this method. 6705 __ jmp(type_check_slow_path->GetEntryLabel()); 6706 break; 6707 } 6708 6709 case TypeCheckKind::kInterfaceCheck: { 6710 // Fast path for the interface check. Try to avoid read barriers to improve the fast path. 6711 // We can not get false positives by doing this. 6712 // /* HeapReference<Class> */ temp = obj->klass_ 6713 GenerateReferenceLoadTwoRegisters(instruction, 6714 temp_loc, 6715 obj_loc, 6716 class_offset, 6717 kWithoutReadBarrier); 6718 6719 // /* HeapReference<Class> */ temp = temp->iftable_ 6720 GenerateReferenceLoadTwoRegisters(instruction, 6721 temp_loc, 6722 temp_loc, 6723 iftable_offset, 6724 kWithoutReadBarrier); 6725 // Iftable is never null. 6726 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); 6727 // Maybe poison the `cls` for direct comparison with memory. 6728 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); 6729 // Loop through the iftable and check if any class matches. 6730 NearLabel start_loop; 6731 __ Bind(&start_loop); 6732 // Need to subtract first to handle the empty array case. 6733 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); 6734 __ j(kNegative, type_check_slow_path->GetEntryLabel()); 6735 // Go to next interface if the classes do not match. 6736 __ cmpl(cls.AsRegister<CpuRegister>(), 6737 CodeGeneratorX86_64::ArrayAddress(temp, 6738 maybe_temp2_loc, 6739 TIMES_4, 6740 object_array_data_offset)); 6741 __ j(kNotEqual, &start_loop); // Return if same class. 6742 // If `cls` was poisoned above, unpoison it. 6743 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); 6744 break; 6745 } 6746 6747 case TypeCheckKind::kBitstringCheck: { 6748 // /* HeapReference<Class> */ temp = obj->klass_ 6749 GenerateReferenceLoadTwoRegisters(instruction, 6750 temp_loc, 6751 obj_loc, 6752 class_offset, 6753 kWithoutReadBarrier); 6754 6755 GenerateBitstringTypeCheckCompare(instruction, temp); 6756 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6757 break; 6758 } 6759 } 6760 6761 if (done.IsLinked()) { 6762 __ Bind(&done); 6763 } 6764 6765 __ Bind(type_check_slow_path->GetExitLabel()); 6766 } 6767 6768 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6769 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6770 instruction, LocationSummary::kCallOnMainOnly); 6771 InvokeRuntimeCallingConvention calling_convention; 6772 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6773 } 6774 6775 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6776 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 6777 instruction, 6778 instruction->GetDexPc()); 6779 if (instruction->IsEnter()) { 6780 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 6781 } else { 6782 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 6783 } 6784 } 6785 6786 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) { 6787 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 6788 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); 6789 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 6790 locations->SetInAt(0, Location::RequiresRegister()); 6791 // There is no immediate variant of negated bitwise and in X86. 6792 locations->SetInAt(1, Location::RequiresRegister()); 6793 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6794 } 6795 6796 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { 6797 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 6798 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); 6799 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 6800 locations->SetInAt(0, Location::RequiresRegister()); 6801 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6802 } 6803 6804 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) { 6805 LocationSummary* locations = instruction->GetLocations(); 6806 Location first = locations->InAt(0); 6807 Location second = locations->InAt(1); 6808 Location dest = locations->Out(); 6809 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6810 } 6811 6812 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { 6813 LocationSummary* locations = instruction->GetLocations(); 6814 Location src = locations->InAt(0); 6815 Location dest = locations->Out(); 6816 switch (instruction->GetOpKind()) { 6817 case HInstruction::kAnd: 6818 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); 6819 break; 6820 case HInstruction::kXor: 6821 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); 6822 break; 6823 default: 6824 LOG(FATAL) << "Unreachable"; 6825 } 6826 } 6827 6828 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } 6829 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } 6830 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } 6831 6832 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6833 LocationSummary* locations = 6834 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6835 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 6836 || instruction->GetResultType() == DataType::Type::kInt64); 6837 locations->SetInAt(0, Location::RequiresRegister()); 6838 locations->SetInAt(1, Location::Any()); 6839 locations->SetOut(Location::SameAsFirstInput()); 6840 } 6841 6842 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) { 6843 HandleBitwiseOperation(instruction); 6844 } 6845 6846 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) { 6847 HandleBitwiseOperation(instruction); 6848 } 6849 6850 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) { 6851 HandleBitwiseOperation(instruction); 6852 } 6853 6854 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6855 LocationSummary* locations = instruction->GetLocations(); 6856 Location first = locations->InAt(0); 6857 Location second = locations->InAt(1); 6858 DCHECK(first.Equals(locations->Out())); 6859 6860 if (instruction->GetResultType() == DataType::Type::kInt32) { 6861 if (second.IsRegister()) { 6862 if (instruction->IsAnd()) { 6863 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6864 } else if (instruction->IsOr()) { 6865 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6866 } else { 6867 DCHECK(instruction->IsXor()); 6868 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6869 } 6870 } else if (second.IsConstant()) { 6871 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 6872 if (instruction->IsAnd()) { 6873 __ andl(first.AsRegister<CpuRegister>(), imm); 6874 } else if (instruction->IsOr()) { 6875 __ orl(first.AsRegister<CpuRegister>(), imm); 6876 } else { 6877 DCHECK(instruction->IsXor()); 6878 __ xorl(first.AsRegister<CpuRegister>(), imm); 6879 } 6880 } else { 6881 Address address(CpuRegister(RSP), second.GetStackIndex()); 6882 if (instruction->IsAnd()) { 6883 __ andl(first.AsRegister<CpuRegister>(), address); 6884 } else if (instruction->IsOr()) { 6885 __ orl(first.AsRegister<CpuRegister>(), address); 6886 } else { 6887 DCHECK(instruction->IsXor()); 6888 __ xorl(first.AsRegister<CpuRegister>(), address); 6889 } 6890 } 6891 } else { 6892 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 6893 CpuRegister first_reg = first.AsRegister<CpuRegister>(); 6894 bool second_is_constant = false; 6895 int64_t value = 0; 6896 if (second.IsConstant()) { 6897 second_is_constant = true; 6898 value = second.GetConstant()->AsLongConstant()->GetValue(); 6899 } 6900 bool is_int32_value = IsInt<32>(value); 6901 6902 if (instruction->IsAnd()) { 6903 if (second_is_constant) { 6904 if (is_int32_value) { 6905 __ andq(first_reg, Immediate(static_cast<int32_t>(value))); 6906 } else { 6907 __ andq(first_reg, codegen_->LiteralInt64Address(value)); 6908 } 6909 } else if (second.IsDoubleStackSlot()) { 6910 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6911 } else { 6912 __ andq(first_reg, second.AsRegister<CpuRegister>()); 6913 } 6914 } else if (instruction->IsOr()) { 6915 if (second_is_constant) { 6916 if (is_int32_value) { 6917 __ orq(first_reg, Immediate(static_cast<int32_t>(value))); 6918 } else { 6919 __ orq(first_reg, codegen_->LiteralInt64Address(value)); 6920 } 6921 } else if (second.IsDoubleStackSlot()) { 6922 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6923 } else { 6924 __ orq(first_reg, second.AsRegister<CpuRegister>()); 6925 } 6926 } else { 6927 DCHECK(instruction->IsXor()); 6928 if (second_is_constant) { 6929 if (is_int32_value) { 6930 __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); 6931 } else { 6932 __ xorq(first_reg, codegen_->LiteralInt64Address(value)); 6933 } 6934 } else if (second.IsDoubleStackSlot()) { 6935 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 6936 } else { 6937 __ xorq(first_reg, second.AsRegister<CpuRegister>()); 6938 } 6939 } 6940 } 6941 } 6942 6943 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( 6944 HInstruction* instruction, 6945 Location out, 6946 uint32_t offset, 6947 Location maybe_temp, 6948 ReadBarrierOption read_barrier_option) { 6949 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6950 if (read_barrier_option == kWithReadBarrier) { 6951 CHECK(kEmitCompilerReadBarrier); 6952 if (kUseBakerReadBarrier) { 6953 // Load with fast path based Baker's read barrier. 6954 // /* HeapReference<Object> */ out = *(out + offset) 6955 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6956 instruction, out, out_reg, offset, /* needs_null_check= */ false); 6957 } else { 6958 // Load with slow path based read barrier. 6959 // Save the value of `out` into `maybe_temp` before overwriting it 6960 // in the following move operation, as we will need it for the 6961 // read barrier below. 6962 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 6963 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); 6964 // /* HeapReference<Object> */ out = *(out + offset) 6965 __ movl(out_reg, Address(out_reg, offset)); 6966 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 6967 } 6968 } else { 6969 // Plain load with no read barrier. 6970 // /* HeapReference<Object> */ out = *(out + offset) 6971 __ movl(out_reg, Address(out_reg, offset)); 6972 __ MaybeUnpoisonHeapReference(out_reg); 6973 } 6974 } 6975 6976 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( 6977 HInstruction* instruction, 6978 Location out, 6979 Location obj, 6980 uint32_t offset, 6981 ReadBarrierOption read_barrier_option) { 6982 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 6983 CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); 6984 if (read_barrier_option == kWithReadBarrier) { 6985 CHECK(kEmitCompilerReadBarrier); 6986 if (kUseBakerReadBarrier) { 6987 // Load with fast path based Baker's read barrier. 6988 // /* HeapReference<Object> */ out = *(obj + offset) 6989 codegen_->GenerateFieldLoadWithBakerReadBarrier( 6990 instruction, out, obj_reg, offset, /* needs_null_check= */ false); 6991 } else { 6992 // Load with slow path based read barrier. 6993 // /* HeapReference<Object> */ out = *(obj + offset) 6994 __ movl(out_reg, Address(obj_reg, offset)); 6995 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 6996 } 6997 } else { 6998 // Plain load with no read barrier. 6999 // /* HeapReference<Object> */ out = *(obj + offset) 7000 __ movl(out_reg, Address(obj_reg, offset)); 7001 __ MaybeUnpoisonHeapReference(out_reg); 7002 } 7003 } 7004 7005 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( 7006 HInstruction* instruction, 7007 Location root, 7008 const Address& address, 7009 Label* fixup_label, 7010 ReadBarrierOption read_barrier_option) { 7011 CpuRegister root_reg = root.AsRegister<CpuRegister>(); 7012 if (read_barrier_option == kWithReadBarrier) { 7013 DCHECK(kEmitCompilerReadBarrier); 7014 if (kUseBakerReadBarrier) { 7015 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 7016 // Baker's read barrier are used: 7017 // 7018 // root = obj.field; 7019 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() 7020 // if (temp != null) { 7021 // root = temp(root) 7022 // } 7023 7024 // /* GcRoot<mirror::Object> */ root = *address 7025 __ movl(root_reg, address); 7026 if (fixup_label != nullptr) { 7027 __ Bind(fixup_label); 7028 } 7029 static_assert( 7030 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 7031 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 7032 "have different sizes."); 7033 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 7034 "art::mirror::CompressedReference<mirror::Object> and int32_t " 7035 "have different sizes."); 7036 7037 // Slow path marking the GC root `root`. 7038 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 7039 instruction, root, /* unpoison_ref_before_marking= */ false); 7040 codegen_->AddSlowPath(slow_path); 7041 7042 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. 7043 const int32_t entry_point_offset = 7044 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); 7045 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0)); 7046 // The entrypoint is null when the GC is not marking. 7047 __ j(kNotEqual, slow_path->GetEntryLabel()); 7048 __ Bind(slow_path->GetExitLabel()); 7049 } else { 7050 // GC root loaded through a slow path for read barriers other 7051 // than Baker's. 7052 // /* GcRoot<mirror::Object>* */ root = address 7053 __ leaq(root_reg, address); 7054 if (fixup_label != nullptr) { 7055 __ Bind(fixup_label); 7056 } 7057 // /* mirror::Object* */ root = root->Read() 7058 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 7059 } 7060 } else { 7061 // Plain GC root load with no read barrier. 7062 // /* GcRoot<mirror::Object> */ root = *address 7063 __ movl(root_reg, address); 7064 if (fixup_label != nullptr) { 7065 __ Bind(fixup_label); 7066 } 7067 // Note that GC roots are not affected by heap poisoning, thus we 7068 // do not have to unpoison `root_reg` here. 7069 } 7070 } 7071 7072 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 7073 Location ref, 7074 CpuRegister obj, 7075 uint32_t offset, 7076 bool needs_null_check) { 7077 DCHECK(kEmitCompilerReadBarrier); 7078 DCHECK(kUseBakerReadBarrier); 7079 7080 // /* HeapReference<Object> */ ref = *(obj + offset) 7081 Address src(obj, offset); 7082 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7083 } 7084 7085 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 7086 Location ref, 7087 CpuRegister obj, 7088 uint32_t data_offset, 7089 Location index, 7090 bool needs_null_check) { 7091 DCHECK(kEmitCompilerReadBarrier); 7092 DCHECK(kUseBakerReadBarrier); 7093 7094 static_assert( 7095 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 7096 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 7097 // /* HeapReference<Object> */ ref = 7098 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 7099 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset); 7100 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7101 } 7102 7103 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 7104 Location ref, 7105 CpuRegister obj, 7106 const Address& src, 7107 bool needs_null_check, 7108 bool always_update_field, 7109 CpuRegister* temp1, 7110 CpuRegister* temp2) { 7111 DCHECK(kEmitCompilerReadBarrier); 7112 DCHECK(kUseBakerReadBarrier); 7113 7114 // In slow path based read barriers, the read barrier call is 7115 // inserted after the original load. However, in fast path based 7116 // Baker's read barriers, we need to perform the load of 7117 // mirror::Object::monitor_ *before* the original reference load. 7118 // This load-load ordering is required by the read barrier. 7119 // The fast path/slow path (for Baker's algorithm) should look like: 7120 // 7121 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 7122 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 7123 // HeapReference<Object> ref = *src; // Original reference load. 7124 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 7125 // if (is_gray) { 7126 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 7127 // } 7128 // 7129 // Note: the original implementation in ReadBarrier::Barrier is 7130 // slightly more complex as: 7131 // - it implements the load-load fence using a data dependency on 7132 // the high-bits of rb_state, which are expected to be all zeroes 7133 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead 7134 // here, which is a no-op thanks to the x86-64 memory model); 7135 // - it performs additional checks that we do not do here for 7136 // performance reasons. 7137 7138 CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); 7139 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 7140 7141 // Given the numeric representation, it's enough to check the low bit of the rb_state. 7142 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 7143 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 7144 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 7145 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 7146 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 7147 7148 // if (rb_state == ReadBarrier::GrayState()) 7149 // ref = ReadBarrier::Mark(ref); 7150 // At this point, just do the "if" and make sure that flags are preserved until the branch. 7151 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); 7152 if (needs_null_check) { 7153 MaybeRecordImplicitNullCheck(instruction); 7154 } 7155 7156 // Load fence to prevent load-load reordering. 7157 // Note that this is a no-op, thanks to the x86-64 memory model. 7158 GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 7159 7160 // The actual reference load. 7161 // /* HeapReference<Object> */ ref = *src 7162 __ movl(ref_reg, src); // Flags are unaffected. 7163 7164 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. 7165 // Slow path marking the object `ref` when it is gray. 7166 SlowPathCode* slow_path; 7167 if (always_update_field) { 7168 DCHECK(temp1 != nullptr); 7169 DCHECK(temp2 != nullptr); 7170 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( 7171 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2); 7172 } else { 7173 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 7174 instruction, ref, /* unpoison_ref_before_marking= */ true); 7175 } 7176 AddSlowPath(slow_path); 7177 7178 // We have done the "if" of the gray bit check above, now branch based on the flags. 7179 __ j(kNotZero, slow_path->GetEntryLabel()); 7180 7181 // Object* ref = ref_addr->AsMirrorPtr() 7182 __ MaybeUnpoisonHeapReference(ref_reg); 7183 7184 __ Bind(slow_path->GetExitLabel()); 7185 } 7186 7187 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, 7188 Location out, 7189 Location ref, 7190 Location obj, 7191 uint32_t offset, 7192 Location index) { 7193 DCHECK(kEmitCompilerReadBarrier); 7194 7195 // Insert a slow path based read barrier *after* the reference load. 7196 // 7197 // If heap poisoning is enabled, the unpoisoning of the loaded 7198 // reference will be carried out by the runtime within the slow 7199 // path. 7200 // 7201 // Note that `ref` currently does not get unpoisoned (when heap 7202 // poisoning is enabled), which is alright as the `ref` argument is 7203 // not used by the artReadBarrierSlow entry point. 7204 // 7205 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 7206 SlowPathCode* slow_path = new (GetScopedAllocator()) 7207 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); 7208 AddSlowPath(slow_path); 7209 7210 __ jmp(slow_path->GetEntryLabel()); 7211 __ Bind(slow_path->GetExitLabel()); 7212 } 7213 7214 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 7215 Location out, 7216 Location ref, 7217 Location obj, 7218 uint32_t offset, 7219 Location index) { 7220 if (kEmitCompilerReadBarrier) { 7221 // Baker's read barriers shall be handled by the fast path 7222 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). 7223 DCHECK(!kUseBakerReadBarrier); 7224 // If heap poisoning is enabled, unpoisoning will be taken care of 7225 // by the runtime within the slow path. 7226 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 7227 } else if (kPoisonHeapReferences) { 7228 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); 7229 } 7230 } 7231 7232 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 7233 Location out, 7234 Location root) { 7235 DCHECK(kEmitCompilerReadBarrier); 7236 7237 // Insert a slow path based read barrier *after* the GC root load. 7238 // 7239 // Note that GC roots are not affected by heap poisoning, so we do 7240 // not need to do anything special for this here. 7241 SlowPathCode* slow_path = 7242 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); 7243 AddSlowPath(slow_path); 7244 7245 __ jmp(slow_path->GetEntryLabel()); 7246 __ Bind(slow_path->GetExitLabel()); 7247 } 7248 7249 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7250 // Nothing to do, this should be removed during prepare for register allocator. 7251 LOG(FATAL) << "Unreachable"; 7252 } 7253 7254 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7255 // Nothing to do, this should be removed during prepare for register allocator. 7256 LOG(FATAL) << "Unreachable"; 7257 } 7258 7259 // Simple implementation of packed switch - generate cascaded compare/jumps. 7260 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 7261 LocationSummary* locations = 7262 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 7263 locations->SetInAt(0, Location::RequiresRegister()); 7264 locations->AddTemp(Location::RequiresRegister()); 7265 locations->AddTemp(Location::RequiresRegister()); 7266 } 7267 7268 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 7269 int32_t lower_bound = switch_instr->GetStartValue(); 7270 uint32_t num_entries = switch_instr->GetNumEntries(); 7271 LocationSummary* locations = switch_instr->GetLocations(); 7272 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); 7273 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); 7274 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 7275 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 7276 7277 // Should we generate smaller inline compare/jumps? 7278 if (num_entries <= kPackedSwitchJumpTableThreshold) { 7279 // Figure out the correct compare values and jump conditions. 7280 // Handle the first compare/branch as a special case because it might 7281 // jump to the default case. 7282 DCHECK_GT(num_entries, 2u); 7283 Condition first_condition; 7284 uint32_t index; 7285 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 7286 if (lower_bound != 0) { 7287 first_condition = kLess; 7288 __ cmpl(value_reg_in, Immediate(lower_bound)); 7289 __ j(first_condition, codegen_->GetLabelOf(default_block)); 7290 __ j(kEqual, codegen_->GetLabelOf(successors[0])); 7291 7292 index = 1; 7293 } else { 7294 // Handle all the compare/jumps below. 7295 first_condition = kBelow; 7296 index = 0; 7297 } 7298 7299 // Handle the rest of the compare/jumps. 7300 for (; index + 1 < num_entries; index += 2) { 7301 int32_t compare_to_value = lower_bound + index + 1; 7302 __ cmpl(value_reg_in, Immediate(compare_to_value)); 7303 // Jump to successors[index] if value < case_value[index]. 7304 __ j(first_condition, codegen_->GetLabelOf(successors[index])); 7305 // Jump to successors[index + 1] if value == case_value[index + 1]. 7306 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); 7307 } 7308 7309 if (index != num_entries) { 7310 // There are an odd number of entries. Handle the last one. 7311 DCHECK_EQ(index + 1, num_entries); 7312 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index))); 7313 __ j(kEqual, codegen_->GetLabelOf(successors[index])); 7314 } 7315 7316 // And the default for any other value. 7317 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 7318 __ jmp(codegen_->GetLabelOf(default_block)); 7319 } 7320 return; 7321 } 7322 7323 // Remove the bias, if needed. 7324 Register value_reg_out = value_reg_in.AsRegister(); 7325 if (lower_bound != 0) { 7326 __ leal(temp_reg, Address(value_reg_in, -lower_bound)); 7327 value_reg_out = temp_reg.AsRegister(); 7328 } 7329 CpuRegister value_reg(value_reg_out); 7330 7331 // Is the value in range? 7332 __ cmpl(value_reg, Immediate(num_entries - 1)); 7333 __ j(kAbove, codegen_->GetLabelOf(default_block)); 7334 7335 // We are in the range of the table. 7336 // Load the address of the jump table in the constant area. 7337 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); 7338 7339 // Load the (signed) offset from the jump table. 7340 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); 7341 7342 // Add the offset to the address of the table base. 7343 __ addq(temp_reg, base_reg); 7344 7345 // And jump. 7346 __ jmp(temp_reg); 7347 } 7348 7349 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 7350 ATTRIBUTE_UNUSED) { 7351 LOG(FATAL) << "Unreachable"; 7352 } 7353 7354 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 7355 ATTRIBUTE_UNUSED) { 7356 LOG(FATAL) << "Unreachable"; 7357 } 7358 7359 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) { 7360 if (value == 0) { 7361 __ xorl(dest, dest); 7362 } else { 7363 __ movl(dest, Immediate(value)); 7364 } 7365 } 7366 7367 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { 7368 if (value == 0) { 7369 // Clears upper bits too. 7370 __ xorl(dest, dest); 7371 } else if (IsUint<32>(value)) { 7372 // We can use a 32 bit move, as it will zero-extend and is shorter. 7373 __ movl(dest, Immediate(static_cast<int32_t>(value))); 7374 } else { 7375 __ movq(dest, Immediate(value)); 7376 } 7377 } 7378 7379 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) { 7380 if (value == 0) { 7381 __ xorps(dest, dest); 7382 } else { 7383 __ movss(dest, LiteralInt32Address(value)); 7384 } 7385 } 7386 7387 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) { 7388 if (value == 0) { 7389 __ xorpd(dest, dest); 7390 } else { 7391 __ movsd(dest, LiteralInt64Address(value)); 7392 } 7393 } 7394 7395 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) { 7396 Load32BitValue(dest, bit_cast<int32_t, float>(value)); 7397 } 7398 7399 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) { 7400 Load64BitValue(dest, bit_cast<int64_t, double>(value)); 7401 } 7402 7403 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) { 7404 if (value == 0) { 7405 __ testl(dest, dest); 7406 } else { 7407 __ cmpl(dest, Immediate(value)); 7408 } 7409 } 7410 7411 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { 7412 if (IsInt<32>(value)) { 7413 if (value == 0) { 7414 __ testq(dest, dest); 7415 } else { 7416 __ cmpq(dest, Immediate(static_cast<int32_t>(value))); 7417 } 7418 } else { 7419 // Value won't fit in an int. 7420 __ cmpq(dest, LiteralInt64Address(value)); 7421 } 7422 } 7423 7424 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { 7425 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 7426 GenerateIntCompare(lhs_reg, rhs); 7427 } 7428 7429 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { 7430 if (rhs.IsConstant()) { 7431 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); 7432 Compare32BitValue(lhs, value); 7433 } else if (rhs.IsStackSlot()) { 7434 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); 7435 } else { 7436 __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); 7437 } 7438 } 7439 7440 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) { 7441 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 7442 if (rhs.IsConstant()) { 7443 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); 7444 Compare64BitValue(lhs_reg, value); 7445 } else if (rhs.IsDoubleStackSlot()) { 7446 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 7447 } else { 7448 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>()); 7449 } 7450 } 7451 7452 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, 7453 Location index, 7454 ScaleFactor scale, 7455 uint32_t data_offset) { 7456 return index.IsConstant() ? 7457 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : 7458 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); 7459 } 7460 7461 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { 7462 DCHECK(dest.IsDoubleStackSlot()); 7463 if (IsInt<32>(value)) { 7464 // Can move directly as an int32 constant. 7465 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), 7466 Immediate(static_cast<int32_t>(value))); 7467 } else { 7468 Load64BitValue(CpuRegister(TMP), value); 7469 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); 7470 } 7471 } 7472 7473 /** 7474 * Class to handle late fixup of offsets into constant area. 7475 */ 7476 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { 7477 public: 7478 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) 7479 : codegen_(&codegen), offset_into_constant_area_(offset) {} 7480 7481 protected: 7482 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } 7483 7484 CodeGeneratorX86_64* codegen_; 7485 7486 private: 7487 void Process(const MemoryRegion& region, int pos) override { 7488 // Patch the correct offset for the instruction. We use the address of the 7489 // 'next' instruction, which is 'pos' (patch the 4 bytes before). 7490 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; 7491 int32_t relative_position = constant_offset - pos; 7492 7493 // Patch in the right value. 7494 region.StoreUnaligned<int32_t>(pos - 4, relative_position); 7495 } 7496 7497 // Location in constant area that the fixup refers to. 7498 size_t offset_into_constant_area_; 7499 }; 7500 7501 /** 7502 t * Class to handle late fixup of offsets to a jump table that will be created in the 7503 * constant area. 7504 */ 7505 class JumpTableRIPFixup : public RIPFixup { 7506 public: 7507 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) 7508 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} 7509 7510 void CreateJumpTable() { 7511 X86_64Assembler* assembler = codegen_->GetAssembler(); 7512 7513 // Ensure that the reference to the jump table has the correct offset. 7514 const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); 7515 SetOffset(offset_in_constant_table); 7516 7517 // Compute the offset from the start of the function to this jump table. 7518 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; 7519 7520 // Populate the jump table with the correct values for the jump table. 7521 int32_t num_entries = switch_instr_->GetNumEntries(); 7522 HBasicBlock* block = switch_instr_->GetBlock(); 7523 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); 7524 // The value that we want is the target offset - the position of the table. 7525 for (int32_t i = 0; i < num_entries; i++) { 7526 HBasicBlock* b = successors[i]; 7527 Label* l = codegen_->GetLabelOf(b); 7528 DCHECK(l->IsBound()); 7529 int32_t offset_to_block = l->Position() - current_table_offset; 7530 assembler->AppendInt32(offset_to_block); 7531 } 7532 } 7533 7534 private: 7535 const HPackedSwitch* switch_instr_; 7536 }; 7537 7538 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { 7539 // Generate the constant area if needed. 7540 X86_64Assembler* assembler = GetAssembler(); 7541 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { 7542 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. 7543 assembler->Align(4, 0); 7544 constant_area_start_ = assembler->CodeSize(); 7545 7546 // Populate any jump tables. 7547 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { 7548 jump_table->CreateJumpTable(); 7549 } 7550 7551 // And now add the constant area to the generated code. 7552 assembler->AddConstantArea(); 7553 } 7554 7555 // And finish up. 7556 CodeGenerator::Finalize(allocator); 7557 } 7558 7559 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { 7560 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v)); 7561 return Address::RIP(fixup); 7562 } 7563 7564 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { 7565 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v)); 7566 return Address::RIP(fixup); 7567 } 7568 7569 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { 7570 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v)); 7571 return Address::RIP(fixup); 7572 } 7573 7574 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { 7575 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v)); 7576 return Address::RIP(fixup); 7577 } 7578 7579 // TODO: trg as memory. 7580 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) { 7581 if (!trg.IsValid()) { 7582 DCHECK_EQ(type, DataType::Type::kVoid); 7583 return; 7584 } 7585 7586 DCHECK_NE(type, DataType::Type::kVoid); 7587 7588 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type); 7589 if (trg.Equals(return_loc)) { 7590 return; 7591 } 7592 7593 // Let the parallel move resolver take care of all of this. 7594 HParallelMove parallel_move(GetGraph()->GetAllocator()); 7595 parallel_move.AddMove(return_loc, trg, type, nullptr); 7596 GetMoveResolver()->EmitNativeCode(¶llel_move); 7597 } 7598 7599 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { 7600 // Create a fixup to be used to create and address the jump table. 7601 JumpTableRIPFixup* table_fixup = 7602 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); 7603 7604 // We have to populate the jump tables. 7605 fixups_to_jump_tables_.push_back(table_fixup); 7606 return Address::RIP(table_fixup); 7607 } 7608 7609 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, 7610 const Address& addr_high, 7611 int64_t v, 7612 HInstruction* instruction) { 7613 if (IsInt<32>(v)) { 7614 int32_t v_32 = v; 7615 __ movq(addr_low, Immediate(v_32)); 7616 MaybeRecordImplicitNullCheck(instruction); 7617 } else { 7618 // Didn't fit in a register. Do it in pieces. 7619 int32_t low_v = Low32Bits(v); 7620 int32_t high_v = High32Bits(v); 7621 __ movl(addr_low, Immediate(low_v)); 7622 MaybeRecordImplicitNullCheck(instruction); 7623 __ movl(addr_high, Immediate(high_v)); 7624 } 7625 } 7626 7627 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, 7628 const uint8_t* roots_data, 7629 const PatchInfo<Label>& info, 7630 uint64_t index_in_table) const { 7631 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 7632 uintptr_t address = 7633 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 7634 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; 7635 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = 7636 dchecked_integral_cast<uint32_t>(address); 7637 } 7638 7639 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 7640 for (const PatchInfo<Label>& info : jit_string_patches_) { 7641 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); 7642 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 7643 PatchJitRootUse(code, roots_data, info, index_in_table); 7644 } 7645 7646 for (const PatchInfo<Label>& info : jit_class_patches_) { 7647 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); 7648 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 7649 PatchJitRootUse(code, roots_data, info, index_in_table); 7650 } 7651 } 7652 7653 #undef __ 7654 7655 } // namespace x86_64 7656 } // namespace art 7657