1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
19 
20 #include "arch/x86/instruction_set_features_x86.h"
21 #include "code_generator.h"
22 #include "dex/compiler_enums.h"
23 #include "driver/compiler_options.h"
24 #include "nodes.h"
25 #include "parallel_move_resolver.h"
26 #include "utils/x86/assembler_x86.h"
27 
28 namespace art {
29 namespace x86 {
30 
31 // Use a local definition to prevent copying mistakes.
32 static constexpr size_t kX86WordSize = kX86PointerSize;
33 
34 class CodeGeneratorX86;
35 
36 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
37 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
38 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
39 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
40 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
41 
42 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
43 static constexpr size_t kRuntimeParameterCoreRegistersLength =
44     arraysize(kRuntimeParameterCoreRegisters);
45 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
46 static constexpr size_t kRuntimeParameterFpuRegistersLength =
47     arraysize(kRuntimeParameterFpuRegisters);
48 
49 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
50  public:
InvokeRuntimeCallingConvention()51   InvokeRuntimeCallingConvention()
52       : CallingConvention(kRuntimeParameterCoreRegisters,
53                           kRuntimeParameterCoreRegistersLength,
54                           kRuntimeParameterFpuRegisters,
55                           kRuntimeParameterFpuRegistersLength,
56                           kX86PointerSize) {}
57 
58  private:
59   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
60 };
61 
62 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
63  public:
InvokeDexCallingConvention()64   InvokeDexCallingConvention() : CallingConvention(
65       kParameterCoreRegisters,
66       kParameterCoreRegistersLength,
67       kParameterFpuRegisters,
68       kParameterFpuRegistersLength,
69       kX86PointerSize) {}
70 
GetRegisterPairAt(size_t argument_index)71   RegisterPair GetRegisterPairAt(size_t argument_index) {
72     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
73     return kParameterCorePairRegisters[argument_index];
74   }
75 
76  private:
77   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
78 };
79 
80 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
81  public:
InvokeDexCallingConventionVisitorX86()82   InvokeDexCallingConventionVisitorX86() {}
~InvokeDexCallingConventionVisitorX86()83   virtual ~InvokeDexCallingConventionVisitorX86() {}
84 
85   Location GetNextLocation(Primitive::Type type) OVERRIDE;
86   Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
87   Location GetMethodLocation() const OVERRIDE;
88 
89  private:
90   InvokeDexCallingConvention calling_convention;
91 
92   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
93 };
94 
95 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
96  public:
FieldAccessCallingConventionX86()97   FieldAccessCallingConventionX86() {}
98 
GetObjectLocation()99   Location GetObjectLocation() const OVERRIDE {
100     return Location::RegisterLocation(ECX);
101   }
GetFieldIndexLocation()102   Location GetFieldIndexLocation() const OVERRIDE {
103     return Location::RegisterLocation(EAX);
104   }
GetReturnLocation(Primitive::Type type)105   Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
106     return Primitive::Is64BitType(type)
107         ? Location::RegisterPairLocation(EAX, EDX)
108         : Location::RegisterLocation(EAX);
109   }
GetSetValueLocation(Primitive::Type type,bool is_instance)110   Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
111     return Primitive::Is64BitType(type)
112         ? Location::RegisterPairLocation(EDX, EBX)
113         : (is_instance
114             ? Location::RegisterLocation(EDX)
115             : Location::RegisterLocation(ECX));
116   }
GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED)117   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
118     return Location::FpuRegisterLocation(XMM0);
119   }
120 
121  private:
122   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86);
123 };
124 
125 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
126  public:
ParallelMoveResolverX86(ArenaAllocator * allocator,CodeGeneratorX86 * codegen)127   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
128       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
129 
130   void EmitMove(size_t index) OVERRIDE;
131   void EmitSwap(size_t index) OVERRIDE;
132   void SpillScratch(int reg) OVERRIDE;
133   void RestoreScratch(int reg) OVERRIDE;
134 
135   X86Assembler* GetAssembler() const;
136 
137  private:
138   void Exchange(Register reg, int mem);
139   void Exchange(int mem1, int mem2);
140   void Exchange32(XmmRegister reg, int mem);
141   void MoveMemoryToMemory32(int dst, int src);
142   void MoveMemoryToMemory64(int dst, int src);
143 
144   CodeGeneratorX86* const codegen_;
145 
146   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86);
147 };
148 
149 class LocationsBuilderX86 : public HGraphVisitor {
150  public:
LocationsBuilderX86(HGraph * graph,CodeGeneratorX86 * codegen)151   LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
152       : HGraphVisitor(graph), codegen_(codegen) {}
153 
154 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
155   void Visit##name(H##name* instr) OVERRIDE;
156 
157   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)158   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
159 
160 #undef DECLARE_VISIT_INSTRUCTION
161 
162   void VisitInstruction(HInstruction* instruction) OVERRIDE {
163     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
164                << " (id " << instruction->GetId() << ")";
165   }
166 
167  private:
168   void HandleBitwiseOperation(HBinaryOperation* instruction);
169   void HandleInvoke(HInvoke* invoke);
170   void HandleCondition(HCondition* condition);
171   void HandleShift(HBinaryOperation* instruction);
172   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
173   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
174 
175   CodeGeneratorX86* const codegen_;
176   InvokeDexCallingConventionVisitorX86 parameter_visitor_;
177 
178   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
179 };
180 
181 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
182  public:
183   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
184 
185 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
186   void Visit##name(H##name* instr) OVERRIDE;
187 
188   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)189   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
190 
191 #undef DECLARE_VISIT_INSTRUCTION
192 
193   void VisitInstruction(HInstruction* instruction) OVERRIDE {
194     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
195                << " (id " << instruction->GetId() << ")";
196   }
197 
GetAssembler()198   X86Assembler* GetAssembler() const { return assembler_; }
199 
200   // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
201   // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
202   // generates less code/data with a small num_entries.
203   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
204 
205  private:
206   // Generate code for the given suspend check. If not null, `successor`
207   // is the block to branch to if the suspend check is not needed, and after
208   // the suspend call.
209   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
210   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
211   void HandleBitwiseOperation(HBinaryOperation* instruction);
212   void GenerateDivRemIntegral(HBinaryOperation* instruction);
213   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
214   void DivByPowerOfTwo(HDiv* instruction);
215   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
216   void GenerateRemFP(HRem* rem);
217   void HandleCondition(HCondition* condition);
218   void HandleShift(HBinaryOperation* instruction);
219   void GenerateShlLong(const Location& loc, Register shifter);
220   void GenerateShrLong(const Location& loc, Register shifter);
221   void GenerateUShrLong(const Location& loc, Register shifter);
222   void GenerateShlLong(const Location& loc, int shift);
223   void GenerateShrLong(const Location& loc, int shift);
224   void GenerateUShrLong(const Location& loc, int shift);
225 
226   void HandleFieldSet(HInstruction* instruction,
227                       const FieldInfo& field_info,
228                       bool value_can_be_null);
229   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
230 
231   // Generate a heap reference load using one register `out`:
232   //
233   //   out <- *(out + offset)
234   //
235   // while honoring heap poisoning and/or read barriers (if any).
236   //
237   // Location `maybe_temp` is used when generating a read barrier and
238   // shall be a register in that case; it may be an invalid location
239   // otherwise.
240   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
241                                         Location out,
242                                         uint32_t offset,
243                                         Location maybe_temp);
244   // Generate a heap reference load using two different registers
245   // `out` and `obj`:
246   //
247   //   out <- *(obj + offset)
248   //
249   // while honoring heap poisoning and/or read barriers (if any).
250   //
251   // Location `maybe_temp` is used when generating a Baker's (fast
252   // path) read barrier and shall be a register in that case; it may
253   // be an invalid location otherwise.
254   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
255                                          Location out,
256                                          Location obj,
257                                          uint32_t offset,
258                                          Location maybe_temp);
259   // Generate a GC root reference load:
260   //
261   //   root <- *address
262   //
263   // while honoring read barriers (if any).
264   void GenerateGcRootFieldLoad(HInstruction* instruction,
265                                Location root,
266                                const Address& address,
267                                Label* fixup_label = nullptr);
268 
269   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
270   // `is_wide` specifies whether it is long/double or not.
271   void PushOntoFPStack(Location source, uint32_t temp_offset,
272                        uint32_t stack_adjustment, bool is_fp, bool is_wide);
273 
274   template<class LabelType>
275   void GenerateTestAndBranch(HInstruction* instruction,
276                              size_t condition_input_index,
277                              LabelType* true_target,
278                              LabelType* false_target);
279   template<class LabelType>
280   void GenerateCompareTestAndBranch(HCondition* condition,
281                                     LabelType* true_target,
282                                     LabelType* false_target);
283   template<class LabelType>
284   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
285   template<class LabelType>
286   void GenerateLongComparesAndJumps(HCondition* cond,
287                                     LabelType* true_label,
288                                     LabelType* false_label);
289 
290   void HandleGoto(HInstruction* got, HBasicBlock* successor);
291   void GenPackedSwitchWithCompares(Register value_reg,
292                                    int32_t lower_bound,
293                                    uint32_t num_entries,
294                                    HBasicBlock* switch_block,
295                                    HBasicBlock* default_block);
296 
297   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
298   void GenerateIntCompare(Location lhs, Location rhs);
299 
300   X86Assembler* const assembler_;
301   CodeGeneratorX86* const codegen_;
302 
303   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
304 };
305 
306 class JumpTableRIPFixup;
307 
308 class CodeGeneratorX86 : public CodeGenerator {
309  public:
310   CodeGeneratorX86(HGraph* graph,
311                    const X86InstructionSetFeatures& isa_features,
312                    const CompilerOptions& compiler_options,
313                    OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorX86()314   virtual ~CodeGeneratorX86() {}
315 
316   void GenerateFrameEntry() OVERRIDE;
317   void GenerateFrameExit() OVERRIDE;
318   void Bind(HBasicBlock* block) OVERRIDE;
319   void MoveConstant(Location destination, int32_t value) OVERRIDE;
320   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
321   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
322 
323   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
324   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
325   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
326   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
327 
328   // Generate code to invoke a runtime entry point.
329   void InvokeRuntime(QuickEntrypointEnum entrypoint,
330                      HInstruction* instruction,
331                      uint32_t dex_pc,
332                      SlowPathCode* slow_path) OVERRIDE;
333 
334   void InvokeRuntime(int32_t entry_point_offset,
335                      HInstruction* instruction,
336                      uint32_t dex_pc,
337                      SlowPathCode* slow_path);
338 
GetWordSize()339   size_t GetWordSize() const OVERRIDE {
340     return kX86WordSize;
341   }
342 
GetFloatingPointSpillSlotSize()343   size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
344     // 8 bytes == 2 words for each spill.
345     return 2 * kX86WordSize;
346   }
347 
GetLocationBuilder()348   HGraphVisitor* GetLocationBuilder() OVERRIDE {
349     return &location_builder_;
350   }
351 
GetInstructionVisitor()352   HGraphVisitor* GetInstructionVisitor() OVERRIDE {
353     return &instruction_visitor_;
354   }
355 
GetAssembler()356   X86Assembler* GetAssembler() OVERRIDE {
357     return &assembler_;
358   }
359 
GetAssembler()360   const X86Assembler& GetAssembler() const OVERRIDE {
361     return assembler_;
362   }
363 
GetAddressOf(HBasicBlock * block)364   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
365     return GetLabelOf(block)->Position();
366   }
367 
368   void SetupBlockedRegisters() const OVERRIDE;
369 
370   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
371   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
372 
373   // Blocks all register pairs made out of blocked core registers.
374   void UpdateBlockedPairRegisters() const;
375 
GetMoveResolver()376   ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
377     return &move_resolver_;
378   }
379 
GetInstructionSet()380   InstructionSet GetInstructionSet() const OVERRIDE {
381     return InstructionSet::kX86;
382   }
383 
384   // Helper method to move a 32bits value between two locations.
385   void Move32(Location destination, Location source);
386   // Helper method to move a 64bits value between two locations.
387   void Move64(Location destination, Location source);
388 
389   // Check if the desired_string_load_kind is supported. If it is, return it,
390   // otherwise return a fall-back kind that should be used instead.
391   HLoadString::LoadKind GetSupportedLoadStringKind(
392       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
393 
394   // Check if the desired_dispatch_info is supported. If it is, return it,
395   // otherwise return a fall-back info that should be used instead.
396   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
397       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
398       MethodReference target_method) OVERRIDE;
399 
400   // Generate a call to a static or direct method.
401   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
402   // Generate a call to a virtual method.
403   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
404 
405   void RecordSimplePatch();
406   void RecordStringPatch(HLoadString* load_string);
407   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
408 
409   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
410 
411   // Emit linker patches.
412   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
413 
414   // Emit a write barrier.
415   void MarkGCCard(Register temp,
416                   Register card,
417                   Register object,
418                   Register value,
419                   bool value_can_be_null);
420 
421   void GenerateMemoryBarrier(MemBarrierKind kind);
422 
GetLabelOf(HBasicBlock * block)423   Label* GetLabelOf(HBasicBlock* block) const {
424     return CommonGetLabelOf<Label>(block_labels_, block);
425   }
426 
Initialize()427   void Initialize() OVERRIDE {
428     block_labels_ = CommonInitializeLabels<Label>();
429   }
430 
NeedsTwoRegisters(Primitive::Type type)431   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
432     return type == Primitive::kPrimLong;
433   }
434 
ShouldSplitLongMoves()435   bool ShouldSplitLongMoves() const OVERRIDE { return true; }
436 
GetFrameEntryLabel()437   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
438 
GetInstructionSetFeatures()439   const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
440     return isa_features_;
441   }
442 
SetMethodAddressOffset(int32_t offset)443   void SetMethodAddressOffset(int32_t offset) {
444     method_address_offset_ = offset;
445   }
446 
GetMethodAddressOffset()447   int32_t GetMethodAddressOffset() const {
448     return method_address_offset_;
449   }
450 
ConstantAreaStart()451   int32_t ConstantAreaStart() const {
452     return constant_area_start_;
453   }
454 
455   Address LiteralDoubleAddress(double v, Register reg);
456   Address LiteralFloatAddress(float v, Register reg);
457   Address LiteralInt32Address(int32_t v, Register reg);
458   Address LiteralInt64Address(int64_t v, Register reg);
459 
460   // Load a 32-bit value into a register in the most efficient manner.
461   void Load32BitValue(Register dest, int32_t value);
462 
463   // Compare a register with a 32-bit value in the most efficient manner.
464   void Compare32BitValue(Register dest, int32_t value);
465 
466   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
467 
468   void Finalize(CodeAllocator* allocator) OVERRIDE;
469 
470   // Fast path implementation of ReadBarrier::Barrier for a heap
471   // reference field load when Baker's read barriers are used.
472   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
473                                              Location ref,
474                                              Register obj,
475                                              uint32_t offset,
476                                              Location temp,
477                                              bool needs_null_check);
478   // Fast path implementation of ReadBarrier::Barrier for a heap
479   // reference array load when Baker's read barriers are used.
480   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
481                                              Location ref,
482                                              Register obj,
483                                              uint32_t data_offset,
484                                              Location index,
485                                              Location temp,
486                                              bool needs_null_check);
487 
488   // Generate a read barrier for a heap reference within `instruction`
489   // using a slow path.
490   //
491   // A read barrier for an object reference read from the heap is
492   // implemented as a call to the artReadBarrierSlow runtime entry
493   // point, which is passed the values in locations `ref`, `obj`, and
494   // `offset`:
495   //
496   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
497   //                                      mirror::Object* obj,
498   //                                      uint32_t offset);
499   //
500   // The `out` location contains the value returned by
501   // artReadBarrierSlow.
502   //
503   // When `index` is provided (i.e. for array accesses), the offset
504   // value passed to artReadBarrierSlow is adjusted to take `index`
505   // into account.
506   void GenerateReadBarrierSlow(HInstruction* instruction,
507                                Location out,
508                                Location ref,
509                                Location obj,
510                                uint32_t offset,
511                                Location index = Location::NoLocation());
512 
513   // If read barriers are enabled, generate a read barrier for a heap
514   // reference using a slow path. If heap poisoning is enabled, also
515   // unpoison the reference in `out`.
516   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
517                                     Location out,
518                                     Location ref,
519                                     Location obj,
520                                     uint32_t offset,
521                                     Location index = Location::NoLocation());
522 
523   // Generate a read barrier for a GC root within `instruction` using
524   // a slow path.
525   //
526   // A read barrier for an object reference GC root is implemented as
527   // a call to the artReadBarrierForRootSlow runtime entry point,
528   // which is passed the value in location `root`:
529   //
530   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
531   //
532   // The `out` location contains the value returned by
533   // artReadBarrierForRootSlow.
534   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
535 
536   // Ensure that prior stores complete to memory before subsequent loads.
537   // The locked add implementation will avoid serializing device memory, but will
538   // touch (but not change) the top of the stack.
539   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
540   void MemoryFence(bool non_temporal = false) {
541     if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
542       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
543     } else {
544       assembler_.mfence();
545     }
546   }
547 
548   void GenerateNop();
549   void GenerateImplicitNullCheck(HNullCheck* instruction);
550   void GenerateExplicitNullCheck(HNullCheck* instruction);
551 
552   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
553   // The correct value will be inserted when processing Assembler fixups.
554   static constexpr int32_t kDummy32BitOffset = 256;
555 
556  private:
557   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
558   // and GenerateArrayLoadWithBakerReadBarrier.
559   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
560                                                  Location ref,
561                                                  Register obj,
562                                                  const Address& src,
563                                                  Location temp,
564                                                  bool needs_null_check);
565 
566   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
567 
568   struct PcRelativeDexCacheAccessInfo {
PcRelativeDexCacheAccessInfoPcRelativeDexCacheAccessInfo569     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
570         : target_dex_file(dex_file), element_offset(element_off), label() { }
571 
572     const DexFile& target_dex_file;
573     uint32_t element_offset;
574     // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset.
575     Label label;
576   };
577 
578   // Labels for each block that will be compiled.
579   Label* block_labels_;  // Indexed by block id.
580   Label frame_entry_label_;
581   LocationsBuilderX86 location_builder_;
582   InstructionCodeGeneratorX86 instruction_visitor_;
583   ParallelMoveResolverX86 move_resolver_;
584   X86Assembler assembler_;
585   const X86InstructionSetFeatures& isa_features_;
586 
587   // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
588   ArenaDeque<MethodPatchInfo<Label>> method_patches_;
589   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
590   // PC-relative DexCache access info.
591   ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
592   // Patch locations for patchoat where the linker doesn't do any other work.
593   ArenaDeque<Label> simple_patches_;
594   // String patch locations.
595   ArenaDeque<StringPatchInfo<Label>> string_patches_;
596 
597   // Offset to the start of the constant area in the assembled code.
598   // Used for fixups to the constant area.
599   int32_t constant_area_start_;
600 
601   // Fixups for jump tables that need to be patched after the constant table is generated.
602   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
603 
604   // If there is a HX86ComputeBaseMethodAddress instruction in the graph
605   // (which shall be the sole instruction of this kind), subtracting this offset
606   // from the value contained in the out register of this HX86ComputeBaseMethodAddress
607   // instruction gives the address of the start of this method.
608   int32_t method_address_offset_;
609 
610   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
611 };
612 
613 }  // namespace x86
614 }  // namespace art
615 
616 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
617