1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/bit_field.h" 23 #include "driver/compiler_options.h" 24 #include "globals.h" 25 #include "locations.h" 26 #include "memory_region.h" 27 #include "nodes.h" 28 #include "stack_map_stream.h" 29 30 namespace art { 31 32 // Binary encoding of 2^32 for type double. 33 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 34 // Binary encoding of 2^31 for type double. 35 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 36 37 // Maximum value for a primitive integer. 38 static int32_t constexpr kPrimIntMax = 0x7fffffff; 39 // Maximum value for a primitive long. 40 static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; 41 42 class Assembler; 43 class CodeGenerator; 44 class DexCompilationUnit; 45 class ParallelMoveResolver; 46 class SrcMapElem; 47 template <class Alloc> 48 class SrcMap; 49 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 50 51 class CodeAllocator { 52 public: CodeAllocator()53 CodeAllocator() {} ~CodeAllocator()54 virtual ~CodeAllocator() {} 55 56 virtual uint8_t* Allocate(size_t size) = 0; 57 58 private: 59 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 60 }; 61 62 struct PcInfo { 63 uint32_t dex_pc; 64 uintptr_t native_pc; 65 }; 66 67 class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 68 public: SlowPathCode()69 SlowPathCode() { 70 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 71 saved_core_stack_offsets_[i] = kRegisterNotSaved; 72 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 73 } 74 } 75 ~SlowPathCode()76 virtual ~SlowPathCode() {} 77 78 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 79 80 void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 81 void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 82 void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc); 83 IsCoreRegisterSaved(int reg)84 bool IsCoreRegisterSaved(int reg) const { 85 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 86 } 87 IsFpuRegisterSaved(int reg)88 bool IsFpuRegisterSaved(int reg) const { 89 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 90 } 91 GetStackOffsetOfCoreRegister(int reg)92 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 93 return saved_core_stack_offsets_[reg]; 94 } 95 GetStackOffsetOfFpuRegister(int reg)96 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 97 return saved_fpu_stack_offsets_[reg]; 98 } 99 100 private: 101 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 102 static constexpr uint32_t kRegisterNotSaved = -1; 103 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 104 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 105 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 106 }; 107 108 class InvokeDexCallingConventionVisitor { 109 public: 110 virtual Location GetNextLocation(Primitive::Type type) = 0; 111 112 protected: InvokeDexCallingConventionVisitor()113 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()114 virtual ~InvokeDexCallingConventionVisitor() {} 115 116 // The current index for core registers. 117 uint32_t gp_index_ = 0u; 118 // The current index for floating-point registers. 119 uint32_t float_index_ = 0u; 120 // The current stack index. 121 uint32_t stack_index_ = 0u; 122 123 private: 124 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 125 }; 126 127 class CodeGenerator { 128 public: 129 // Compiles the graph to executable instructions. Returns whether the compilation 130 // succeeded. 131 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 132 void CompileOptimized(CodeAllocator* allocator); 133 static CodeGenerator* Create(HGraph* graph, 134 InstructionSet instruction_set, 135 const InstructionSetFeatures& isa_features, 136 const CompilerOptions& compiler_options); ~CodeGenerator()137 virtual ~CodeGenerator() {} 138 GetGraph()139 HGraph* GetGraph() const { return graph_; } 140 141 HBasicBlock* GetNextBlockToEmit() const; 142 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 143 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 144 GetStackSlotOfParameter(HParameterValue * parameter)145 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 146 // Note that this follows the current calling convention. 147 return GetFrameSize() 148 + InstructionSetPointerSize(GetInstructionSet()) // Art method 149 + parameter->GetIndex() * kVRegSize; 150 } 151 152 virtual void Initialize() = 0; 153 virtual void Finalize(CodeAllocator* allocator); 154 virtual void GenerateFrameEntry() = 0; 155 virtual void GenerateFrameExit() = 0; 156 virtual void Bind(HBasicBlock* block) = 0; 157 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 158 virtual Assembler* GetAssembler() = 0; 159 virtual size_t GetWordSize() const = 0; 160 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 161 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 162 void InitializeCodeGeneration(size_t number_of_spill_slots, 163 size_t maximum_number_of_live_core_registers, 164 size_t maximum_number_of_live_fp_registers, 165 size_t number_of_out_slots, 166 const GrowableArray<HBasicBlock*>& block_order); 167 int32_t GetStackSlot(HLocal* local) const; 168 Location GetTemporaryLocation(HTemporary* temp) const; 169 GetFrameSize()170 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)171 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()172 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()173 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 174 GetNumberOfCoreRegisters()175 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()176 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 177 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 178 ComputeSpillMask()179 virtual void ComputeSpillMask() { 180 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 181 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 182 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 183 } 184 ComputeRegisterMask(const int * registers,size_t length)185 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 186 uint32_t mask = 0; 187 for (size_t i = 0, e = length; i < e; ++i) { 188 mask |= (1 << registers[i]); 189 } 190 return mask; 191 } 192 193 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 194 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 195 virtual InstructionSet GetInstructionSet() const = 0; 196 GetCompilerOptions()197 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 198 199 // Saves the register in the stack. Returns the size taken on stack. 200 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 201 // Restores the register from the stack. Returns the size taken on stack. 202 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 203 204 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 205 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 206 207 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 208 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()209 virtual bool ShouldSplitLongMoves() const { return false; } 210 IsCoreCalleeSaveRegister(int reg)211 bool IsCoreCalleeSaveRegister(int reg) const { 212 return (core_callee_save_mask_ & (1 << reg)) != 0; 213 } 214 IsFloatingPointCalleeSaveRegister(int reg)215 bool IsFloatingPointCalleeSaveRegister(int reg) const { 216 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 217 } 218 219 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 220 bool CanMoveNullCheckToUser(HNullCheck* null_check); 221 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 222 AddSlowPath(SlowPathCode * slow_path)223 void AddSlowPath(SlowPathCode* slow_path) { 224 slow_paths_.Add(slow_path); 225 } 226 227 void BuildSourceMap(DefaultSrcMap* src_map) const; 228 void BuildMappingTable(std::vector<uint8_t>* vector) const; 229 void BuildVMapTable(std::vector<uint8_t>* vector) const; 230 void BuildNativeGCMap( 231 std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 232 void BuildStackMaps(std::vector<uint8_t>* vector); 233 IsBaseline()234 bool IsBaseline() const { 235 return is_baseline_; 236 } 237 IsLeafMethod()238 bool IsLeafMethod() const { 239 return is_leaf_; 240 } 241 MarkNotLeaf()242 void MarkNotLeaf() { 243 is_leaf_ = false; 244 requires_current_method_ = true; 245 } 246 SetRequiresCurrentMethod()247 void SetRequiresCurrentMethod() { 248 requires_current_method_ = true; 249 } 250 RequiresCurrentMethod()251 bool RequiresCurrentMethod() const { 252 return requires_current_method_; 253 } 254 255 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 256 // suspend check. This is called when the code generator generates code 257 // for the suspend check at the back edge (instead of where the suspend check 258 // is, which is the loop entry). At this point, the spill slots for the phis 259 // have not been written to. 260 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 261 GetBlockedCoreRegisters()262 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()263 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 264 265 // Helper that returns the pointer offset of an index in an object array. 266 // Note: this method assumes we always have the same pointer size, regardless 267 // of the architecture. 268 static size_t GetCacheOffset(uint32_t index); 269 // Pointer variant for ArtMethod and ArtField arrays. 270 size_t GetCachePointerOffset(uint32_t index); 271 272 void EmitParallelMoves(Location from1, 273 Location to1, 274 Primitive::Type type1, 275 Location from2, 276 Location to2, 277 Primitive::Type type2); 278 StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)279 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 280 // Check that null value is not represented as an integer constant. 281 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 282 return type == Primitive::kPrimNot && !value->IsNullConstant(); 283 } 284 AddAllocatedRegister(Location location)285 void AddAllocatedRegister(Location location) { 286 allocated_registers_.Add(location); 287 } 288 289 void AllocateLocations(HInstruction* instruction); 290 291 // Tells whether the stack frame of the compiled method is 292 // considered "empty", that is either actually having a size of zero, 293 // or just containing the saved return address register. HasEmptyFrame()294 bool HasEmptyFrame() const { 295 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 296 } 297 GetInt32ValueOf(HConstant * constant)298 static int32_t GetInt32ValueOf(HConstant* constant) { 299 if (constant->IsIntConstant()) { 300 return constant->AsIntConstant()->GetValue(); 301 } else if (constant->IsNullConstant()) { 302 return 0; 303 } else { 304 DCHECK(constant->IsFloatConstant()); 305 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 306 } 307 } 308 GetInt64ValueOf(HConstant * constant)309 static int64_t GetInt64ValueOf(HConstant* constant) { 310 if (constant->IsIntConstant()) { 311 return constant->AsIntConstant()->GetValue(); 312 } else if (constant->IsNullConstant()) { 313 return 0; 314 } else if (constant->IsFloatConstant()) { 315 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 316 } else if (constant->IsLongConstant()) { 317 return constant->AsLongConstant()->GetValue(); 318 } else { 319 DCHECK(constant->IsDoubleConstant()); 320 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 321 } 322 } 323 GetFirstRegisterSlotInSlowPath()324 size_t GetFirstRegisterSlotInSlowPath() const { 325 return first_register_slot_in_slow_path_; 326 } 327 FrameEntrySpillSize()328 uint32_t FrameEntrySpillSize() const { 329 return GetFpuSpillSize() + GetCoreSpillSize(); 330 } 331 332 virtual ParallelMoveResolver* GetMoveResolver() = 0; 333 334 protected: CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options)335 CodeGenerator(HGraph* graph, 336 size_t number_of_core_registers, 337 size_t number_of_fpu_registers, 338 size_t number_of_register_pairs, 339 uint32_t core_callee_save_mask, 340 uint32_t fpu_callee_save_mask, 341 const CompilerOptions& compiler_options) 342 : frame_size_(0), 343 core_spill_mask_(0), 344 fpu_spill_mask_(0), 345 first_register_slot_in_slow_path_(0), 346 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 347 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 348 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 349 number_of_core_registers_(number_of_core_registers), 350 number_of_fpu_registers_(number_of_fpu_registers), 351 number_of_register_pairs_(number_of_register_pairs), 352 core_callee_save_mask_(core_callee_save_mask), 353 fpu_callee_save_mask_(fpu_callee_save_mask), 354 is_baseline_(false), 355 graph_(graph), 356 compiler_options_(compiler_options), 357 pc_infos_(graph->GetArena(), 32), 358 slow_paths_(graph->GetArena(), 8), 359 block_order_(nullptr), 360 current_block_index_(0), 361 is_leaf_(true), 362 requires_current_method_(false), 363 stack_map_stream_(graph->GetArena()) {} 364 365 // Register allocation logic. 366 void AllocateRegistersLocally(HInstruction* instruction) const; 367 368 // Backend specific implementation for allocating a register. 369 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 370 371 static size_t FindFreeEntry(bool* array, size_t length); 372 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 373 374 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 375 376 virtual HGraphVisitor* GetLocationBuilder() = 0; 377 virtual HGraphVisitor* GetInstructionVisitor() = 0; 378 379 // Returns the location of the first spilled entry for floating point registers, 380 // relative to the stack pointer. GetFpuSpillStart()381 uint32_t GetFpuSpillStart() const { 382 return GetFrameSize() - FrameEntrySpillSize(); 383 } 384 GetFpuSpillSize()385 uint32_t GetFpuSpillSize() const { 386 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 387 } 388 GetCoreSpillSize()389 uint32_t GetCoreSpillSize() const { 390 return POPCOUNT(core_spill_mask_) * GetWordSize(); 391 } 392 HasAllocatedCalleeSaveRegisters()393 bool HasAllocatedCalleeSaveRegisters() const { 394 // We check the core registers against 1 because it always comprises the return PC. 395 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 396 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 397 } 398 CallPushesPC()399 bool CallPushesPC() const { 400 InstructionSet instruction_set = GetInstructionSet(); 401 return instruction_set == kX86 || instruction_set == kX86_64; 402 } 403 404 // Arm64 has its own type for a label, so we need to templatize this method 405 // to share the logic. 406 template <typename T> CommonGetLabelOf(T * raw_pointer_to_labels_array,HBasicBlock * block)407 T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { 408 block = FirstNonEmptyBlock(block); 409 return raw_pointer_to_labels_array + block->GetBlockId(); 410 } 411 412 // Frame size required for this method. 413 uint32_t frame_size_; 414 uint32_t core_spill_mask_; 415 uint32_t fpu_spill_mask_; 416 uint32_t first_register_slot_in_slow_path_; 417 418 // Registers that were allocated during linear scan. 419 RegisterSet allocated_registers_; 420 421 // Arrays used when doing register allocation to know which 422 // registers we can allocate. `SetupBlockedRegisters` updates the 423 // arrays. 424 bool* const blocked_core_registers_; 425 bool* const blocked_fpu_registers_; 426 bool* const blocked_register_pairs_; 427 size_t number_of_core_registers_; 428 size_t number_of_fpu_registers_; 429 size_t number_of_register_pairs_; 430 const uint32_t core_callee_save_mask_; 431 const uint32_t fpu_callee_save_mask_; 432 433 // Whether we are using baseline. 434 bool is_baseline_; 435 436 private: 437 void InitLocationsBaseline(HInstruction* instruction); 438 size_t GetStackOffsetOfSavedRegister(size_t index); 439 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 440 void BlockIfInRegister(Location location, bool is_out = false) const; 441 442 HGraph* const graph_; 443 const CompilerOptions& compiler_options_; 444 445 GrowableArray<PcInfo> pc_infos_; 446 GrowableArray<SlowPathCode*> slow_paths_; 447 448 // The order to use for code generation. 449 const GrowableArray<HBasicBlock*>* block_order_; 450 451 // The current block index in `block_order_` of the block 452 // we are generating code for. 453 size_t current_block_index_; 454 455 // Whether the method is a leaf method. 456 bool is_leaf_; 457 458 // Whether an instruction in the graph accesses the current method. 459 bool requires_current_method_; 460 461 StackMapStream stack_map_stream_; 462 463 friend class OptimizingCFITest; 464 465 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 466 }; 467 468 template <typename C, typename F> 469 class CallingConvention { 470 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,size_t pointer_size)471 CallingConvention(const C* registers, 472 size_t number_of_registers, 473 const F* fpu_registers, 474 size_t number_of_fpu_registers, 475 size_t pointer_size) 476 : registers_(registers), 477 number_of_registers_(number_of_registers), 478 fpu_registers_(fpu_registers), 479 number_of_fpu_registers_(number_of_fpu_registers), 480 pointer_size_(pointer_size) {} 481 GetNumberOfRegisters()482 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()483 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 484 GetRegisterAt(size_t index)485 C GetRegisterAt(size_t index) const { 486 DCHECK_LT(index, number_of_registers_); 487 return registers_[index]; 488 } 489 GetFpuRegisterAt(size_t index)490 F GetFpuRegisterAt(size_t index) const { 491 DCHECK_LT(index, number_of_fpu_registers_); 492 return fpu_registers_[index]; 493 } 494 GetStackOffsetOf(size_t index)495 size_t GetStackOffsetOf(size_t index) const { 496 // We still reserve the space for parameters passed by registers. 497 // Add space for the method pointer. 498 return pointer_size_ + index * kVRegSize; 499 } 500 501 private: 502 const C* registers_; 503 const size_t number_of_registers_; 504 const F* fpu_registers_; 505 const size_t number_of_fpu_registers_; 506 const size_t pointer_size_; 507 508 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 509 }; 510 511 } // namespace art 512 513 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 514